From d6f543be340513bd4816b068a9d294d42bbe6ab8 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Tue, 22 Aug 2017 09:35:50 +0100 Subject: [PATCH 01/18] Handle fill value on netCDF save --- lib/iris/fileformats/netcdf.py | 202 ++++++++++++++++++++------------- 1 file changed, 121 insertions(+), 81 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index a2d87d2eed..2734a75c19 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -741,6 +741,23 @@ def _setncattr(variable, name, attribute): return variable.setncattr(name, attribute) +class _ValueCheckAndStoreTarget(object): + """ + To be used with da.store. Remembers whether any element was equal to a + given value before passing the chunk to the given target. + + """ + def __init__(self, target, check_value=None): + self.target = target + self.check_value = check_value + self.contains_value = False + + def __setitem__(self, keys, arr): + if self.check_value is not None: + self.contains_value |= self.check_value in arr + self.target[keys] = arr + + class Saver(object): """A manager for saving netcdf files.""" @@ -812,7 +829,7 @@ def __exit__(self, type, value, traceback): def write(self, cube, local_keys=None, unlimited_dimensions=None, zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', - least_significant_digit=None, packing=None): + least_significant_digit=None, packing=None, fill_value=None): """ Wrapper for saving cubes to a NetCDF file. @@ -902,11 +919,16 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, on `cube.data` and possible masking. For masked data, `fill_value` is taken from netCDF4.default_fillvals. For more control, pass a dict with one or more of the following keys: `dtype` (required), - `scale_factor`, `add_offset`, and `fill_value`. Note that automatic - calculation of packing parameters will trigger loading of lazy - data; set them manually using a dict to avoid this. The default is - `None`, in which case the datatype is determined from the cube and - no packing will occur. + `scale_factor` and `add_offset`. Note that automatic calculation of + packing parameters will trigger loading of lazy data; set them + manually using a dict to avoid this. The default is `None`, in + which case the datatype is determined from the cube and no packing + will occur. + + * fill_value: + The value to use for the `_FillValue` attribute on the netCDF + variable. If `packing` is specified the value of `fill_value` + should be in the domain of the packed data. Returns: None. @@ -955,7 +977,8 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, cube, dimension_names, local_keys, zlib=zlib, complevel=complevel, shuffle=shuffle, fletcher32=fletcher32, contiguous=contiguous, chunksizes=chunksizes, endian=endian, - least_significant_digit=least_significant_digit, packing=packing) + least_significant_digit=least_significant_digit, packing=packing, + fill_value=fill_value) # Add coordinate variables. self._add_dim_coords(cube, dimension_names) @@ -1840,7 +1863,7 @@ def add_ellipsoid(ellipsoid): _setncattr(cf_var_cube, 'grid_mapping', cs.grid_mapping_name) def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, - **kwargs): + packing=None, fill_value=None, **kwargs): """ Create CF-netCDF data variable for the cube and any associated grid mapping. @@ -1858,6 +1881,28 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, An interable of cube attribute keys. Any cube attributes with matching keys will become attributes on the data variable. + * packing (type or string or dict or list): A numpy integer datatype + (signed or unsigned) or a string that describes a numpy integer + dtype(i.e. 'i2', 'short', 'u4') or a dict of packing parameters as + described below. This provides support for netCDF data packing as + described in + http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values + If this argument is a type (or type string), appropriate values of + scale_factor and add_offset will be automatically calculated based + on `cube.data` and possible masking. For masked data, `fill_value` + is taken from netCDF4.default_fillvals. For more control, pass a + dict with one or more of the following keys: `dtype` (required), + `scale_factor` and `add_offset`. Note that automatic calculation of + packing parameters will trigger loading of lazy data; set them + manually using a dict to avoid this. The default is `None`, in + which case the datatype is determined from the cube and no packing + will occur. + + * fill_value: + The value to use for the `_FillValue` attribute on the netCDF + variable if the data is masked. If `packing` is specified the + value of `fill_value` should be in the domain of the packed data. + All other keywords are passed through to the dataset's `createVariable` method. @@ -1865,47 +1910,32 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, The newly created CF-netCDF data variable. """ - if 'packing' in kwargs: - packing = kwargs.pop('packing') - if packing: - scale_factor = None - add_offset = None - fill_value = None - if isinstance(packing, dict): - if 'dtype' not in packing: - msg = "The dtype attribute is required for packing." - raise ValueError(msg) - dtype = np.dtype(packing['dtype']) - if 'scale_factor' in packing: - scale_factor = packing['scale_factor'] - if 'add_offset' in packing: - add_offset = packing['add_offset'] - if 'fill_value' in packing: - fill_value = packing['fill_value'] + + if packing: + if isinstance(packing, dict): + if 'dtype' not in packing: + msg = "The dtype attribute is required for packing." + raise ValueError(msg) + dtype = np.dtype(packing['dtype']) + scale_factor = packing.get('scale_factor', None) + add_offset = packing.get('add_offset', None) + else: + masked = ma.isMaskedArray(cube.data) + dtype = np.dtype(packing) + cmax = cube.data.max() + cmin = cube.data.min() + n = dtype.itemsize * 8 + if masked: + scale_factor = (cmax - cmin)/(2**n-2) else: - masked = ma.isMaskedArray(cube.data) - dtype = np.dtype(packing) - cmax = cube.data.max() - cmin = cube.data.min() - n = dtype.itemsize * 8 + scale_factor = (cmax - cmin)/(2**n-1) + if dtype.kind == 'u': + add_offset = cmin + elif dtype.kind == 'i': if masked: - scale_factor = (cmax - cmin)/(2**n-2) + add_offset = (cmax + cmin)/2 else: - scale_factor = (cmax - cmin)/(2**n-1) - if dtype.kind == 'u': - add_offset = cmin - elif dtype.kind == 'i': - if masked: - add_offset = (cmax + cmin)/2 - else: - add_offset = cmin + 2**(n-1)*scale_factor - needs_fill_value = (cube.has_lazy_data() or - ma.isMaskedArray(cube.data)) - if needs_fill_value and fill_value is None: - dtstr = dtype.str[1:] - fill_value = netCDF4.default_fillvals[dtstr] - else: - packing = None + add_offset = cmin + 2**(n-1)*scale_factor def set_packing_ncattrs(cfvar): """Set netCDF packing attributes.""" @@ -1924,36 +1954,42 @@ def set_packing_ncattrs(cfvar): self._dataset.file_format in ('NETCDF3_CLASSIC', 'NETCDF3_64BIT')): - if packing is None: - # Determine whether there is a cube MDI value. - fill_value = get_fill_value(cube.core_data()) - # Get the values in a form which is valid for the file format. data = self._ensure_valid_dtype(cube.data, 'cube', cube) - if packing is None: - dtype = data.dtype.newbyteorder('=') + def store(data, cf_var, fill_value): + cf_var[:] = data + return fill_value is not None and fill_value in data + else: + data = cube.lazy_data() - # Create the cube CF-netCDF data variable with data payload. - cf_var = self._dataset.createVariable( - cf_name, dtype, dimension_names, - fill_value=fill_value, **kwargs) - set_packing_ncattrs(cf_var) - cf_var[:] = data + def store(data, cf_var, fill_value): + # Store lazy data and check whether it contains the fill value + target = _ValueCheckAndStoreTarget(cf_var, fill_value) + da.store([data], [target]) + return target.contains_value - else: - # Create the cube CF-netCDF data variable. - if packing is None: - fill_value = get_fill_value(cube.core_data()) - dtype = cube.dtype.newbyteorder('=') + if not packing: + dtype = data.dtype.newbyteorder('=') - cf_var = self._dataset.createVariable( - cf_name, dtype, - dimension_names, fill_value=fill_value, - **kwargs) - set_packing_ncattrs(cf_var) + # Create the cube CF-netCDF data variable with data payload. + cf_var = self._dataset.createVariable(cf_name, dtype, dimension_names, + fill_value=fill_value, + **kwargs) + set_packing_ncattrs(cf_var) + + # If packing attributes are specified, don't bother checking whether + # the fill value is in the data. + fill_value_to_check = None if packing else \ + fill_value if fill_value is not None else \ + netCDF4.default_fillvals[data.dtype.str[1:]] + + # Store the data and check if it contains the fill value + contains_fill_value = store(data, cf_var, fill_value_to_check) - da.store([cube.lazy_data()], [cf_var]) + if contains_fill_value: + warnings.warn('Cube {} contains fill value {}. Some data points ' + 'will be masked.') if cube.standard_name: _setncattr(cf_var, 'standard_name', cube.standard_name) @@ -2040,7 +2076,7 @@ def _increment_name(self, varname): def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, unlimited_dimensions=None, zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', - least_significant_digit=None, packing=None): + least_significant_digit=None, packing=None, fill_value=None): """ Save cube(s) to a netCDF file, given the cube and the filename. @@ -2144,16 +2180,19 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, This provides support for netCDF data packing as described in http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values If this argument is a type (or type string), appropriate values of - scale_factor and add_offset will be automatically calculated based on - `cube.data` and possible masking. For masked data, `fill_value` is - taken from netCDF4.default_fillvals. For more control, pass a dict with - one or more of the following keys: `dtype` (required), `scale_factor`, - `add_offset`, and `fill_value`. To save multiple cubes with different - packing parameters, pass an iterable of types, strings, dicts, or - `None`, one for each cube. Note that automatic calculation of packing - parameters will trigger loading of lazy data; set them manually using a - dict to avoid this. The default is `None`, in which case the datatype - is determined from the cube and no packing will occur. + scale_factor and add_offset will be automatically calculated based + on `cube.data` and possible masking. For masked data, `fill_value` + is taken from netCDF4.default_fillvals. For more control, pass a + dict with one or more of the following keys: `dtype` (required), + `scale_factor` and `add_offset`. Note that automatic calculation of + packing parameters will trigger loading of lazy data; set them manually + using a dict to avoid this. The default is `None`, in which case the + datatype is determined from the cube and no packing will occur. + + * fill_value: + The value to use for the `_FillValue` attribute on the netCDF variable. + If `packing` is specified the value of `fill_value` should be in the + domain of the packed data. Returns: None. @@ -2250,7 +2289,8 @@ def is_valid_packspec(p): for cube, packspec in zip(cubes, packspecs): sman.write(cube, local_keys, unlimited_dimensions, zlib, complevel, shuffle, fletcher32, contiguous, chunksizes, endian, - least_significant_digit, packing=packspec) + least_significant_digit, packing=packspec, + fill_value=fill_value) if iris.config.netcdf.conventions_override: # Set to the default if custom conventions are not available. From c5d9dc0cf6ed1c879455af962f17ccb7df555bf7 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Wed, 23 Aug 2017 10:07:09 +0100 Subject: [PATCH 02/18] Delete _FillValue after writing data if it wasn't masked. Rejig docstrings --- lib/iris/fileformats/netcdf.py | 102 +++++++++--------- .../unit/fileformats/netcdf/test_Saver.py | 3 +- 2 files changed, 51 insertions(+), 54 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 2734a75c19..fe6ce40f1d 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -741,20 +741,23 @@ def _setncattr(variable, name, attribute): return variable.setncattr(name, attribute) -class _ValueCheckAndStoreTarget(object): +class _FillValueMaskCheckAndStoreTarget(object): """ To be used with da.store. Remembers whether any element was equal to a - given value before passing the chunk to the given target. + given value and whether it was masked, before passing the chunk to the + given target. """ - def __init__(self, target, check_value=None): + def __init__(self, target, fill_value=None): self.target = target - self.check_value = check_value + self.fill_value = fill_value self.contains_value = False + self.is_masked = False def __setitem__(self, keys, arr): - if self.check_value is not None: - self.contains_value |= self.check_value in arr + if self.fill_value is not None: + self.contains_value |= self.fill_value in arr + self.is_masked |= ma.isMaskedArray(arr) self.target[keys] = arr @@ -916,9 +919,8 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values If this argument is a type (or type string), appropriate values of scale_factor and add_offset will be automatically calculated based - on `cube.data` and possible masking. For masked data, `fill_value` - is taken from netCDF4.default_fillvals. For more control, pass a - dict with one or more of the following keys: `dtype` (required), + on `cube.data` and possible masking. For more control, pass a dict + with one or more of the following keys: `dtype` (required), `scale_factor` and `add_offset`. Note that automatic calculation of packing parameters will trigger loading of lazy data; set them manually using a dict to avoid this. The default is `None`, in @@ -928,7 +930,9 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, * fill_value: The value to use for the `_FillValue` attribute on the netCDF variable. If `packing` is specified the value of `fill_value` - should be in the domain of the packed data. + should be in the domain of the packed data. If this argument is not + supplied and the data is masked, `fill_value` is taken from + netCDF4.default_fillvals Returns: None. @@ -1878,30 +1882,11 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, Kwargs: * local_keys (iterable of strings): - An interable of cube attribute keys. Any cube attributes - with matching keys will become attributes on the data variable. - - * packing (type or string or dict or list): A numpy integer datatype - (signed or unsigned) or a string that describes a numpy integer - dtype(i.e. 'i2', 'short', 'u4') or a dict of packing parameters as - described below. This provides support for netCDF data packing as - described in - http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values - If this argument is a type (or type string), appropriate values of - scale_factor and add_offset will be automatically calculated based - on `cube.data` and possible masking. For masked data, `fill_value` - is taken from netCDF4.default_fillvals. For more control, pass a - dict with one or more of the following keys: `dtype` (required), - `scale_factor` and `add_offset`. Note that automatic calculation of - packing parameters will trigger loading of lazy data; set them - manually using a dict to avoid this. The default is `None`, in - which case the datatype is determined from the cube and no packing - will occur. - + * see :func:`iris.fileformats.netcdf.Saver.write` + * packing (type or string or dict or list): + * see :func:`iris.fileformats.netcdf.Saver.write` * fill_value: - The value to use for the `_FillValue` attribute on the netCDF - variable if the data is masked. If `packing` is specified the - value of `fill_value` should be in the domain of the packed data. + * see :func:`iris.fileformats.netcdf.Saver.write` All other keywords are passed through to the dataset's `createVariable` method. @@ -1959,19 +1944,27 @@ def set_packing_ncattrs(cfvar): def store(data, cf_var, fill_value): cf_var[:] = data - return fill_value is not None and fill_value in data + is_masked = ma.isMaskedArray(data) + contains_value = fill_value is not None and fill_value in data + return is_masked, contains_value else: data = cube.lazy_data() def store(data, cf_var, fill_value): - # Store lazy data and check whether it contains the fill value - target = _ValueCheckAndStoreTarget(cf_var, fill_value) + # Store lazy data and check whether it is masked and contains + # the fill value + target = _FillValueMaskCheckAndStoreTarget(cf_var, fill_value) da.store([data], [target]) - return target.contains_value + return target.is_masked, target.contains_value if not packing: dtype = data.dtype.newbyteorder('=') + if fill_value is None: + fill_value = netCDF4.default_fillvals[dtype.str[1:]] + # Ensure it is the correct dtype + fill_value = dtype.type(fill_value) + # Create the cube CF-netCDF data variable with data payload. cf_var = self._dataset.createVariable(cf_name, dtype, dimension_names, fill_value=fill_value, @@ -1980,16 +1973,19 @@ def store(data, cf_var, fill_value): # If packing attributes are specified, don't bother checking whether # the fill value is in the data. - fill_value_to_check = None if packing else \ - fill_value if fill_value is not None else \ - netCDF4.default_fillvals[data.dtype.str[1:]] + fill_value_to_check = None if packing else fill_value - # Store the data and check if it contains the fill value - contains_fill_value = store(data, cf_var, fill_value_to_check) + # Store the data and check if it is masked and contains the fill value + is_masked, contains_fill_value = store(data, cf_var, + fill_value_to_check) - if contains_fill_value: - warnings.warn('Cube {} contains fill value {}. Some data points ' - 'will be masked.') + if is_masked: + if contains_fill_value: + warnings.warn("Cube '{}' contains fill value {}. Some data " + "points will be masked.".format(cube.name(), + fill_value)) + else: + cf_var.delncattr('_FillValue') if cube.standard_name: _setncattr(cf_var, 'standard_name', cube.standard_name) @@ -2181,18 +2177,18 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values If this argument is a type (or type string), appropriate values of scale_factor and add_offset will be automatically calculated based - on `cube.data` and possible masking. For masked data, `fill_value` - is taken from netCDF4.default_fillvals. For more control, pass a - dict with one or more of the following keys: `dtype` (required), - `scale_factor` and `add_offset`. Note that automatic calculation of - packing parameters will trigger loading of lazy data; set them manually - using a dict to avoid this. The default is `None`, in which case the - datatype is determined from the cube and no packing will occur. + on `cube.data` and possible masking. For more control, pass a dict with + one or more of the following keys: `dtype` (required), `scale_factor` + and `add_offset`. Note that automatic calculation of packing parameters + will trigger loading of lazy data; set them manually using a dict to + avoid this. The default is `None`, in which case the datatype is + determined from the cube and no packing will occur. * fill_value: The value to use for the `_FillValue` attribute on the netCDF variable. If `packing` is specified the value of `fill_value` should be in the - domain of the packed data. + domain of the packed data. If this argument is not supplied and the + data is masked, `fill_value` is taken from netCDF4.default_fillvals Returns: None. diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py index 46621c892b..82f310f689 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py @@ -158,12 +158,13 @@ def test_big_endian(self): def test_zlib(self): cube = self._simple_cube('>f4') with mock.patch('iris.fileformats.netcdf.netCDF4') as api: + api.default_fillvals = {'f4': 12345.} with Saver('/dummy/path', 'NETCDF4') as saver: saver.write(cube, zlib=True) dataset = api.Dataset.return_value create_var_calls = mock.call.createVariable( 'air_pressure_anomaly', np.dtype('float32'), ['dim0', 'dim1'], - fill_value=None, shuffle=True, least_significant_digit=None, + fill_value=mock.ANY, shuffle=True, least_significant_digit=None, contiguous=False, zlib=True, fletcher32=False, endian='native', complevel=4, chunksizes=None).call_list() dataset.assert_has_calls(create_var_calls) From 1cfe7f89b647f261b9289411c7644bbada4ae4dc Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Thu, 24 Aug 2017 12:13:13 +0100 Subject: [PATCH 03/18] Modify fill_value/masking handling --- lib/iris/fileformats/netcdf.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index fe6ce40f1d..983c001d66 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -1960,11 +1960,6 @@ def store(data, cf_var, fill_value): if not packing: dtype = data.dtype.newbyteorder('=') - if fill_value is None: - fill_value = netCDF4.default_fillvals[dtype.str[1:]] - # Ensure it is the correct dtype - fill_value = dtype.type(fill_value) - # Create the cube CF-netCDF data variable with data payload. cf_var = self._dataset.createVariable(cf_name, dtype, dimension_names, fill_value=fill_value, @@ -1973,19 +1968,26 @@ def store(data, cf_var, fill_value): # If packing attributes are specified, don't bother checking whether # the fill value is in the data. - fill_value_to_check = None if packing else fill_value + fill_value_to_check = None if packing else \ + fill_value if fill_value is not None else \ + netCDF4.default_fillvals[dtype.str[1:]] # Store the data and check if it is masked and contains the fill value is_masked, contains_fill_value = store(data, cf_var, fill_value_to_check) - if is_masked: - if contains_fill_value: - warnings.warn("Cube '{}' contains fill value {}. Some data " - "points will be masked.".format(cube.name(), - fill_value)) - else: - cf_var.delncattr('_FillValue') + if dtype.itemsize == 1 and fill_value is None: + if is_masked: + warnings.warn("Cube '{}' contains masked byte data and will " + "be interpreted as unmasked. To save as masked " + "data please explicitly provide a fill value." + .format(cube.name())) + elif contains_fill_value: + warnings.warn("Cube '{}' contains data points equal to the fill" + "value {}. The points will be interpreted as being " + "masked. Please provide a fill_value argument not " + "equal to any data point.".format(cube.name(), + fill_value)) if cube.standard_name: _setncattr(cf_var, 'standard_name', cube.standard_name) From 1f234b3886b26c560a230169e3b848fe8de93425 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Thu, 24 Aug 2017 14:28:25 +0100 Subject: [PATCH 04/18] Allow fill_value to be a list --- lib/iris/fileformats/netcdf.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 983c001d66..7332979acf 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -2184,13 +2184,20 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, and `add_offset`. Note that automatic calculation of packing parameters will trigger loading of lazy data; set them manually using a dict to avoid this. The default is `None`, in which case the datatype is - determined from the cube and no packing will occur. + determined from the cube and no packing will occur. If this argument is + a list it must have the same number of elements as `cube` if `cube` is a + `:class:`iris.cube.CubeList`, or one element, and each element of this + argument will be applied to each cube separately. - * fill_value: + * fill_value (numeric or list): The value to use for the `_FillValue` attribute on the netCDF variable. If `packing` is specified the value of `fill_value` should be in the domain of the packed data. If this argument is not supplied and the - data is masked, `fill_value` is taken from netCDF4.default_fillvals + data is masked, `fill_value` is taken from netCDF4.default_fillvals. If + this argument is a list it must have the same number of elements as + `cube` if `cube` is a `:class:`iris.cube.CubeList`, or a single + element, and each element of this argument will be applied to each cube + separately. Returns: None. @@ -2281,10 +2288,24 @@ def is_valid_packspec(p): raise ValueError(msg) packspecs = packing + if isinstance(fill_value, six.string_types): + fill_values = repeat(fill_value) + else: + try: + fill_values = tuple(fill_value) + except TypeError: + fill_values = repeat(fill_value) + else: + if len(fill_values) != len(cubes): + msg = ('If fill_value is a list, it must have the ' + 'same number of elements as the argument to' + 'cube.') + raise ValueError(msg) + # Initialise Manager for saving with Saver(filename, netcdf_format) as sman: # Iterate through the cubelist. - for cube, packspec in zip(cubes, packspecs): + for cube, packspec, fill_value in zip(cubes, packspecs, fill_values): sman.write(cube, local_keys, unlimited_dimensions, zlib, complevel, shuffle, fletcher32, contiguous, chunksizes, endian, least_significant_digit, packing=packspec, From 4f5dadd5cc0e377c8dce7ba5e69116f26fe0a173 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Thu, 24 Aug 2017 14:31:53 +0100 Subject: [PATCH 05/18] Remove redundant get_fill_value function --- lib/iris/_lazy_data.py | 33 --------------------------------- lib/iris/fileformats/netcdf.py | 2 +- 2 files changed, 1 insertion(+), 34 deletions(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 0da1c4c785..b413d8752a 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -131,39 +131,6 @@ def as_concrete_data(data): return data -def get_fill_value(data): - """ - Return the fill value of a concrete or lazy masked array. - - Args: - - * data: - A dask array, NumPy `ndarray` or masked array. - - Returns: - The fill value of `data` if `data` represents a masked array, or None. - - """ - # If lazy, get the smallest slice of the data from which we can retrieve - # the fill_value. - if is_lazy_data(data): - inds = [0] * data.ndim - if inds: - inds[-1] = slice(0, 1) - data = data[tuple(inds)] - data = as_concrete_data(data) - else: - if isinstance(data, ma.core.MaskedConstant): - data = ma.array(data.data, mask=data.mask) - - # Now get the fill value. - fill_value = None - if ma.isMaskedArray(data): - fill_value = data.fill_value - - return fill_value - - def multidim_lazy_stack(stack): """ Recursively build a multidimensional stacked dask array. diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 7332979acf..2bdbdca410 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -56,7 +56,7 @@ import iris.fileformats._pyke_rules import iris.io import iris.util -from iris._lazy_data import as_lazy_data, get_fill_value +from iris._lazy_data import as_lazy_data # Show Pyke inference engine statistics. DEBUG = False From 92d5f30e366538fcab4091ea4d30a57e395f4288 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Thu, 24 Aug 2017 14:40:42 +0100 Subject: [PATCH 06/18] Modify fill_value description in docstrings --- lib/iris/fileformats/netcdf.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 2bdbdca410..efc12ba825 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -930,9 +930,7 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, * fill_value: The value to use for the `_FillValue` attribute on the netCDF variable. If `packing` is specified the value of `fill_value` - should be in the domain of the packed data. If this argument is not - supplied and the data is masked, `fill_value` is taken from - netCDF4.default_fillvals + should be in the domain of the packed data. Returns: None. @@ -2192,12 +2190,10 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, * fill_value (numeric or list): The value to use for the `_FillValue` attribute on the netCDF variable. If `packing` is specified the value of `fill_value` should be in the - domain of the packed data. If this argument is not supplied and the - data is masked, `fill_value` is taken from netCDF4.default_fillvals. If - this argument is a list it must have the same number of elements as - `cube` if `cube` is a `:class:`iris.cube.CubeList`, or a single - element, and each element of this argument will be applied to each cube - separately. + domain of the packed data. If this argument is a list it must have the + same number of elements as `cube` if `cube` is a + `:class:`iris.cube.CubeList`, or a single element, and each element of + this argument will be applied to each cube separately. Returns: None. From 2aa21c46beee52df7094cbd54716900b6dcbfc0e Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Thu, 24 Aug 2017 13:44:10 +0100 Subject: [PATCH 07/18] Unskip and fix some tests --- lib/iris/tests/integration/test_netcdf.py | 16 ---------------- .../TestPackedData/multi_packed_multi_dtype.cdl | 1 - .../hybrid_height_cubes.cml | 4 ++-- .../tests/results/netcdf/netcdf_monotonic.cml | 6 +++--- .../results/netcdf/netcdf_save_hybrid_height.cdl | 1 - .../tests/results/netcdf/netcdf_save_multi_0.cdl | 1 - .../tests/results/netcdf/netcdf_save_multi_1.cdl | 1 - .../tests/results/netcdf/netcdf_save_multi_2.cdl | 1 - .../results/netcdf/netcdf_save_multiple.cdl | 3 --- .../netcdf/netcdf_save_ndim_auxiliary.cdl | 1 - .../tests/results/netcdf/netcdf_save_single.cdl | 1 - lib/iris/tests/results/netcdf/netcdf_units_1.cml | 2 +- .../tests/results/netcdf/uint32_data_netcdf3.cml | 2 +- lib/iris/tests/test_netcdf.py | 11 +---------- 14 files changed, 8 insertions(+), 43 deletions(-) diff --git a/lib/iris/tests/integration/test_netcdf.py b/lib/iris/tests/integration/test_netcdf.py index 4a2c03064f..c3f25b3127 100644 --- a/lib/iris/tests/integration/test_netcdf.py +++ b/lib/iris/tests/integration/test_netcdf.py @@ -111,7 +111,6 @@ def test_shared_primary(self): self.assertRaisesRegexp(ValueError, 'multiple aux factories'): iris.save(cube, filename) - @tests.skip_dask_mask def test_hybrid_height_cubes(self): hh1 = stock.simple_4d_with_hybrid_height() hh1.attributes['cube'] = 'hh1' @@ -211,20 +210,6 @@ def test_lazy_preserved_save(self): saver.write(acube) self.assertTrue(acube.has_lazy_data()) - @tests.skip_dask_mask - def test_lazy_mask_preserve_fill_value(self): - data = ma.array([0, 1], mask=[False, True]) - cube = iris.cube.Cube(data, fill_value=-1) - with self.temp_filename(suffix='.nc') as filename, \ - self.temp_filename(suffix='.nc') as other_filename: - iris.save(cube, filename, unlimited_dimensions=[]) - ncube = iris.load_cube(filename) - # Lazy save of the masked cube - iris.save(ncube, other_filename, unlimited_dimensions=[]) - ds = nc.Dataset(other_filename, 'r') - avar = ds['unknown'] - self.assertEqual(avar._FillValue, -1) - @tests.skip_data class TestCellMeasures(tests.IrisTest): @@ -428,7 +413,6 @@ def test_multi_packed_single_dtype(self): # Read PP input file. self._multi_test('multi_packed_single_dtype.cdl') - @tests.skip_dask_mask def test_multi_packed_multi_dtype(self): """Test saving multiple packed cubes with pack_dtype list.""" # Read PP input file. diff --git a/lib/iris/tests/results/integration/netcdf/TestPackedData/multi_packed_multi_dtype.cdl b/lib/iris/tests/results/integration/netcdf/TestPackedData/multi_packed_multi_dtype.cdl index ffc6763a04..34fcf2825b 100644 --- a/lib/iris/tests/results/integration/netcdf/TestPackedData/multi_packed_multi_dtype.cdl +++ b/lib/iris/tests/results/integration/netcdf/TestPackedData/multi_packed_multi_dtype.cdl @@ -46,7 +46,6 @@ variables: height:standard_name = "height" ; height:positive = "up" ; float precipitation_flux(time, latitude, longitude) ; - precipitation_flux:_FillValue = -1.e+30f ; precipitation_flux:standard_name = "precipitation_flux" ; precipitation_flux:units = "kg m-2 s-1" ; precipitation_flux:um_stash_source = "m01s05i216" ; diff --git a/lib/iris/tests/results/integration/netcdf/TestSaveMultipleAuxFactories/hybrid_height_cubes.cml b/lib/iris/tests/results/integration/netcdf/TestSaveMultipleAuxFactories/hybrid_height_cubes.cml index a2e9c63ebb..8f4d1ebc4a 100644 --- a/lib/iris/tests/results/integration/netcdf/TestSaveMultipleAuxFactories/hybrid_height_cubes.cml +++ b/lib/iris/tests/results/integration/netcdf/TestSaveMultipleAuxFactories/hybrid_height_cubes.cml @@ -1,6 +1,6 @@ - + @@ -64,7 +64,7 @@ - + diff --git a/lib/iris/tests/results/netcdf/netcdf_monotonic.cml b/lib/iris/tests/results/netcdf/netcdf_monotonic.cml index a59a629e9f..fd8dffdab3 100644 --- a/lib/iris/tests/results/netcdf/netcdf_monotonic.cml +++ b/lib/iris/tests/results/netcdf/netcdf_monotonic.cml @@ -1,6 +1,6 @@ - + @@ -18,7 +18,7 @@ - + @@ -36,7 +36,7 @@ - + diff --git a/lib/iris/tests/results/netcdf/netcdf_save_hybrid_height.cdl b/lib/iris/tests/results/netcdf/netcdf_save_hybrid_height.cdl index 5a57a8e6ea..f3e56065f1 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_hybrid_height.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_hybrid_height.cdl @@ -6,7 +6,6 @@ dimensions: model_level_number = 10 ; variables: float air_potential_temperature(time, model_level_number, grid_latitude, grid_longitude) ; - air_potential_temperature:_FillValue = -1.073742e+09f ; air_potential_temperature:standard_name = "air_potential_temperature" ; air_potential_temperature:units = "K" ; air_potential_temperature:um_stash_source = "m01s00i004" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_multi_0.cdl b/lib/iris/tests/results/netcdf/netcdf_save_multi_0.cdl index 8e8a021d9b..5fcbcc29cb 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_multi_0.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_multi_0.cdl @@ -5,7 +5,6 @@ dimensions: longitude = 96 ; variables: float air_temperature(time, latitude, longitude) ; - air_temperature:_FillValue = -1.e+30f ; air_temperature:standard_name = "air_temperature" ; air_temperature:units = "K" ; air_temperature:um_stash_source = "m01s03i236" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_multi_1.cdl b/lib/iris/tests/results/netcdf/netcdf_save_multi_1.cdl index b26b390659..eb1343fddb 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_multi_1.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_multi_1.cdl @@ -5,7 +5,6 @@ dimensions: longitude = 96 ; variables: float air_temperature(time, latitude, longitude) ; - air_temperature:_FillValue = -1.e+30f ; air_temperature:standard_name = "air_temperature" ; air_temperature:units = "K" ; air_temperature:um_stash_source = "m01s03i236" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_multi_2.cdl b/lib/iris/tests/results/netcdf/netcdf_save_multi_2.cdl index dc240568ed..40a700b5d3 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_multi_2.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_multi_2.cdl @@ -5,7 +5,6 @@ dimensions: longitude = 96 ; variables: float precipitation_flux(time, latitude, longitude) ; - precipitation_flux:_FillValue = -1.e+30f ; precipitation_flux:standard_name = "precipitation_flux" ; precipitation_flux:units = "kg m-2 s-1" ; precipitation_flux:um_stash_source = "m01s05i216" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_multiple.cdl b/lib/iris/tests/results/netcdf/netcdf_save_multiple.cdl index 5d83b77c6d..10b0a62184 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_multiple.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_multiple.cdl @@ -5,7 +5,6 @@ dimensions: longitude = 96 ; variables: float air_temperature(time, latitude, longitude) ; - air_temperature:_FillValue = -1.e+30f ; air_temperature:standard_name = "air_temperature" ; air_temperature:units = "K" ; air_temperature:um_stash_source = "m01s03i236" ; @@ -45,7 +44,6 @@ variables: height:standard_name = "height" ; height:positive = "up" ; float air_temperature_0(time, latitude, longitude) ; - air_temperature_0:_FillValue = -1.e+30f ; air_temperature_0:standard_name = "air_temperature" ; air_temperature_0:units = "K" ; air_temperature_0:um_stash_source = "m01s03i236" ; @@ -53,7 +51,6 @@ variables: air_temperature_0:grid_mapping = "latitude_longitude" ; air_temperature_0:coordinates = "forecast_period forecast_reference_time height" ; float precipitation_flux(time, latitude, longitude) ; - precipitation_flux:_FillValue = -1.e+30f ; precipitation_flux:standard_name = "precipitation_flux" ; precipitation_flux:units = "kg m-2 s-1" ; precipitation_flux:um_stash_source = "m01s05i216" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_ndim_auxiliary.cdl b/lib/iris/tests/results/netcdf/netcdf_save_ndim_auxiliary.cdl index ee172ace82..790d2c37b7 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_ndim_auxiliary.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_ndim_auxiliary.cdl @@ -5,7 +5,6 @@ dimensions: rlon = 174 ; variables: float pr(time, rlat, rlon) ; - pr:_FillValue = 1.e+30f ; pr:standard_name = "precipitation_flux" ; pr:long_name = "Precipitation" ; pr:units = "kg m-2 s-1" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_single.cdl b/lib/iris/tests/results/netcdf/netcdf_save_single.cdl index 6bee1a5589..f6ef6fc338 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_single.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_single.cdl @@ -4,7 +4,6 @@ dimensions: longitude = 96 ; variables: float air_temperature(latitude, longitude) ; - air_temperature:_FillValue = -1.e+30f ; air_temperature:standard_name = "air_temperature" ; air_temperature:units = "K" ; air_temperature:um_stash_source = "m01s03i236" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_units_1.cml b/lib/iris/tests/results/netcdf/netcdf_units_1.cml index 567ce1fa3a..7f4e700139 100644 --- a/lib/iris/tests/results/netcdf/netcdf_units_1.cml +++ b/lib/iris/tests/results/netcdf/netcdf_units_1.cml @@ -1,6 +1,6 @@ - + diff --git a/lib/iris/tests/results/netcdf/uint32_data_netcdf3.cml b/lib/iris/tests/results/netcdf/uint32_data_netcdf3.cml index 05ac5ec7a8..93da2db83d 100644 --- a/lib/iris/tests/results/netcdf/uint32_data_netcdf3.cml +++ b/lib/iris/tests/results/netcdf/uint32_data_netcdf3.cml @@ -1,6 +1,6 @@ - + diff --git a/lib/iris/tests/test_netcdf.py b/lib/iris/tests/test_netcdf.py index 347b114f09..6823ef9cf9 100644 --- a/lib/iris/tests/test_netcdf.py +++ b/lib/iris/tests/test_netcdf.py @@ -51,7 +51,6 @@ @tests.skip_data class TestNetCDFLoad(tests.IrisTest): - @tests.skip_dask_mask def test_monotonic(self): cubes = iris.load(tests.get_data_path( ('NetCDF', 'testing', 'test_monotonic_coordinate.nc'))) @@ -84,7 +83,6 @@ def test_missing_time_bounds(self): dataset.close() cube = iris.load_cube(filename, 'eastward_wind') - @tests.skip_dask_mask def test_load_global_xyzt_gems(self): # Test loading single xyzt CF-netCDF file (multi-cube). cubes = iris.load(tests.get_data_path(('NetCDF', 'global', 'xyz_t', @@ -96,7 +94,7 @@ def test_load_global_xyzt_gems(self): # manager loading. lnsp = cubes[1] self.assertTrue(ma.isMaskedArray(lnsp.data)) - self.assertEqual(-32767.0, lnsp.fill_value) + self.assertEqual(-32767.0, lnsp.data.fill_value) def test_load_global_xyzt_gems_iter(self): # Test loading stepped single xyzt CF-netCDF file (multi-cube). @@ -245,7 +243,6 @@ def test_deferred_loading(self): self.assertCML(cube[0][(0, 2), (1, 3)], ('netcdf', 'netcdf_deferred_mix_1.cml')) - @tests.skip_dask_mask def test_units(self): # Test exercising graceful cube and coordinate units loading. cube0, cube1 = sorted(iris.load(tests.get_data_path(('NetCDF', @@ -426,7 +423,6 @@ def test_netcdf_save_format(self): with self.assertRaises(ValueError): iris.save(cube, file_out, netcdf_format='WIBBLE') - @tests.skip_dask_mask @tests.skip_data def test_netcdf_save_single(self): # Test saving a single CF-netCDF file. @@ -445,7 +441,6 @@ def test_netcdf_save_single(self): # TODO investigate why merge now make time an AuxCoord rather than a # DimCoord and why forecast_period is 'preferred'. - @tests.skip_dask_mask @tests.skip_data def test_netcdf_save_multi2multi(self): # Test saving multiple CF-netCDF files. @@ -464,7 +459,6 @@ def test_netcdf_save_multi2multi(self): self.assertCDL(file_out, ('netcdf', 'netcdf_save_multi_%d.cdl' % index)) - @tests.skip_dask_mask @tests.skip_data def test_netcdf_save_multi2single(self): # Test saving multiple cubes to a single CF-netCDF file. @@ -565,7 +559,6 @@ def test_netcdf_multi_conflict_name_dup_coord(self): self.assertCDL( file_out, ('netcdf', 'multi_dim_coord_slightly_different.cdl')) - @tests.skip_dask_mask @tests.skip_data def test_netcdf_hybrid_height(self): # Test saving a CF-netCDF file which contains a hybrid height @@ -590,7 +583,6 @@ def test_netcdf_hybrid_height(self): self.assertCML(cube, ('netcdf', 'netcdf_save_load_hybrid_height.cml')) - @tests.skip_dask_mask @tests.skip_data def test_netcdf_save_ndim_auxiliary(self): # Test saving CF-netCDF with multi-dimensional auxiliary coordinates. @@ -864,7 +856,6 @@ def test_uint32_auxiliary_coord_netcdf3(self): 'uint32_auxiliary_coord_netcdf3.cml'), checksum=False) - @tests.skip_dask_mask def test_uint32_data_netcdf3(self): self.cube.data = self.cube.data.astype(np.uint32) with self.temp_filename(suffix='.nc') as filename: From 5d8375a2cae41dd7041073f9e472fade533dc1ae Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Thu, 24 Aug 2017 16:53:00 +0100 Subject: [PATCH 08/18] Remove get_fill_value tests and fix pep8 failure --- lib/iris/fileformats/netcdf.py | 6 +- .../unit/lazy_data/test_get_fill_value.py | 75 ------------------- 2 files changed, 3 insertions(+), 78 deletions(-) delete mode 100644 lib/iris/tests/unit/lazy_data/test_get_fill_value.py diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index efc12ba825..a11bd5ac72 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -2183,9 +2183,9 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, will trigger loading of lazy data; set them manually using a dict to avoid this. The default is `None`, in which case the datatype is determined from the cube and no packing will occur. If this argument is - a list it must have the same number of elements as `cube` if `cube` is a - `:class:`iris.cube.CubeList`, or one element, and each element of this - argument will be applied to each cube separately. + a list it must have the same number of elements as `cube` if `cube` is + a `:class:`iris.cube.CubeList`, or one element, and each element of + this argument will be applied to each cube separately. * fill_value (numeric or list): The value to use for the `_FillValue` attribute on the netCDF variable. diff --git a/lib/iris/tests/unit/lazy_data/test_get_fill_value.py b/lib/iris/tests/unit/lazy_data/test_get_fill_value.py deleted file mode 100644 index e83739f331..0000000000 --- a/lib/iris/tests/unit/lazy_data/test_get_fill_value.py +++ /dev/null @@ -1,75 +0,0 @@ -# (C) British Crown Copyright 2017, Met Office -# -# This file is part of Iris. -# -# Iris is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Iris is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with Iris. If not, see . -"""Test the function :func:`iris._lazy data.get_fill_value`.""" - -from __future__ import (absolute_import, division, print_function) -from six.moves import (filter, input, map, range, zip) # noqa - -# Import iris.tests first so that some things can be initialised before -# importing anything else. -import iris.tests as tests - -import dask.array as da -import numpy as np -import numpy.ma as ma - -from iris._lazy_data import as_lazy_data, get_fill_value - - -class Test_get_fill_value(tests.IrisTest): - def setUp(self): - # Array shape and fill-value. - spec = [((2, 3, 4, 5), -1), # 4d array - ((2, 3, 4), -2), # 3d array - ((2, 3), -3), # 2d array - ((2,), -4), # 1d array - ((), -5)] # 0d array - self.arrays = [np.empty(shape) for (shape, _) in spec] - self.masked = [ma.empty(shape, fill_value=fv) for (shape, fv) in spec] - self.lazy_arrays = [as_lazy_data(array) for array in self.arrays] - self.lazy_masked = [as_lazy_data(array) for array in self.masked] - # Add the masked constant case. - mc = ma.array([0], mask=True)[0] - self.masked.append(mc) - self.lazy_masked.append(as_lazy_data(mc)) - # Collect the expected fill-values. - self.expected_fill_values = [fv for (_, fv) in spec] - mc_fill_value = ma.masked_array(0, dtype=mc.dtype).fill_value - self.expected_fill_values.append(mc_fill_value) - - def test_arrays(self): - for array in self.arrays: - self.assertIsNone(get_fill_value(array)) - - def test_masked(self): - for array, expected in zip(self.masked, self.expected_fill_values): - result = get_fill_value(array) - self.assertEqual(result, expected) - - def test_lazy_arrays(self): - for array in self.lazy_arrays: - self.assertIsNone(get_fill_value(array)) - - def test_lazy_masked(self): - for array, expected in zip(self.lazy_masked, - self.expected_fill_values): - result = get_fill_value(array) - self.assertEqual(result, expected) - - -if __name__ == '__main__': - tests.main() From ca4067dbe4fad8bf5f416671f8598920dfbf911a Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Fri, 25 Aug 2017 13:32:09 +0100 Subject: [PATCH 09/18] Add unit tests --- lib/iris/fileformats/netcdf.py | 2 +- .../unit/fileformats/netcdf/test_Saver.py | 154 ++++++++++++++++++ .../test__FillValueMaskCheckAndStoreTarget.py | 104 ++++++++++++ .../unit/fileformats/netcdf/test_save.py | 94 ++++++++++- 4 files changed, 352 insertions(+), 2 deletions(-) create mode 100644 lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index a11bd5ac72..43c13c779a 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -1981,7 +1981,7 @@ def store(data, cf_var, fill_value): "data please explicitly provide a fill value." .format(cube.name())) elif contains_fill_value: - warnings.warn("Cube '{}' contains data points equal to the fill" + warnings.warn("Cube '{}' contains data points equal to the fill " "value {}. The points will be interpreted as being " "masked. Please provide a fill_value argument not " "equal to any data point.".format(cube.name(), diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py index 82f310f689..a063d98137 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py @@ -24,10 +24,16 @@ # importing anything else. import iris.tests as tests +from contextlib import contextmanager +import re +import warnings + import netCDF4 as nc import numpy as np +from numpy import ma import iris +import iris._lazy_data from iris.coord_systems import (GeogCS, TransverseMercator, RotatedGeogCS, LambertConformal, Mercator, Stereographic, LambertAzimuthalEqualArea) @@ -326,6 +332,154 @@ def test_valid_max_saved(self): ds.close() +class Test_write_fill_value(tests.IrisTest): + def _make_cube(self, dtype, lazy=False, masked_value=None, + masked_index=None): + data = np.arange(12, dtype=dtype).reshape(3, 4) + if masked_value is not None: + data = ma.masked_equal(data, masked_value) + if masked_index is not None: + data = np.ma.masked_array(data) + data[masked_index] = ma.masked + if lazy: + data = iris._lazy_data.as_lazy_data(data) + lat = DimCoord(np.arange(3), 'latitude', units='degrees') + lon = DimCoord(np.arange(4), 'longitude', units='degrees') + return Cube(data, standard_name='air_temperature', units='K', + dim_coords_and_dims=[(lat, 0), (lon, 1)]) + + @contextmanager + def _netCDF_var(self, cube, **kwargs): + # Get the netCDF4 Variable for a cube from a temp file + standard_name = cube.standard_name + with self.temp_filename('.nc') as nc_path: + with Saver(nc_path, 'NETCDF4') as saver: + saver.write(cube, **kwargs) + ds = nc.Dataset(nc_path) + var, = [var for var in ds.variables.values() + if var.standard_name == standard_name] + yield var + + @contextmanager + def _warning_check(self, message_text='', expect_warning=True): + # Check that a warning is raised containing a given string, or that + # no warning containing a given string is raised. + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + yield + matches = (message_text in str(warning.message) for warning in w) + warning_raised = any(matches) + msg = "Warning containing text '{}' not raised." if expect_warning \ + else "Warning containing text '{}' unexpectedly raised." + self.assertEqual(expect_warning, warning_raised, + msg.format(message_text)) + + def test_fill_value(self): + # Test that a passed fill value is saved as a _FillValue attribute. + cube = self._make_cube('>f4') + fill_value = 12345. + with self._netCDF_var(cube, fill_value=fill_value) as var: + self.assertEqual(fill_value, var._FillValue) + + def test_default_fill_value(self): + # Test that if no fill value is passed then there is no _FillValue. + # attribute. + cube = self._make_cube('>f4') + with self._netCDF_var(cube) as var: + self.assertNotIn('_FillValue', var.ncattrs()) + + def test_mask_fill_value(self): + # Test that masked data saves correctly when given a fill value. + index = (1, 1) + fill_value = 12345. + cube = self._make_cube('>f4', masked_index=index) + with self._netCDF_var(cube, fill_value=fill_value) as var: + self.assertEqual(fill_value, var._FillValue) + self.assertTrue(var[index].mask) + + def test_mask_default_fill_value(self): + # Test that masked data saves correctly using the default fill value. + index = (1, 1) + cube = self._make_cube('>f4', masked_index=index) + with self._netCDF_var(cube) as var: + self.assertNotIn('_FillValue', var.ncattrs()) + self.assertTrue(var[index].mask) + + def test_mask_lazy_fill_value(self): + # Test that masked lazy data saves correctly when given a fill value. + index = (1, 1) + fill_value = 12345. + cube = self._make_cube('>f4', masked_index=index, lazy=True) + with self._netCDF_var(cube, fill_value=fill_value) as var: + self.assertEqual(var._FillValue, fill_value) + self.assertTrue(var[index].mask) + + def test_mask_lazy_default_fill_value(self): + # Test that masked lazy data saves correctly when given a fill value. + index = (1, 1) + cube = self._make_cube('>f4', masked_index=index, lazy=True) + with self._netCDF_var(cube) as var: + self.assertNotIn('_FillValue', var.ncattrs()) + self.assertTrue(var[index].mask) + + def test_contains_fill_value_passed(self): + # Test that a warning is raised if the data contains the fill value. + cube = self._make_cube('>f4') + fill_value = 1 + with self._warning_check( + 'contains data points equal to the fill value'): + with self._netCDF_var(cube, fill_value=fill_value): + pass + + def test_contains_fill_value_byte(self): + # Test that a warning is raised if the data contains the fill value + # when it is of a byte type. + cube = self._make_cube('>i1') + fill_value = 1 + with self._warning_check( + 'contains data points equal to the fill value'): + with self._netCDF_var(cube, fill_value=fill_value): + pass + + def test_contains_default_fill_value(self): + # Test that a warning is raised if the data contains the default fill + # value if no fill_value argument is supplied. + cube = self._make_cube('>f4') + cube.data[0, 0] = nc.default_fillvals['f4'] + with self._warning_check( + 'contains data points equal to the fill value'): + with self._netCDF_var(cube): + pass + + def test_contains_default_fill_value_byte(self): + # Test that no warning is raised if the data contains the default fill + # value if no fill_value argument is supplied when the data is of a + # byte type. + cube = self._make_cube('>i1') + with self._warning_check( + 'contains data points equal to the fill value', False): + with self._netCDF_var(cube): + pass + + def test_contains_masked_fill_value(self): + # Test that no warning is raised if the data contains the fill_value at + # a masked point. + fill_value = 1 + cube = self._make_cube('>f4', masked_value=fill_value) + with self._warning_check( + 'contains data points equal to the fill value', False): + with self._netCDF_var(cube, fill_value=fill_value): + pass + + def test_masked_byte_default_fill_value(self): + # Test that a warning is raised when saving masked byte data with no + # fill value supplied. + cube = self._make_cube('>i1', masked_value=1) + with self._warning_check('contains masked byte data', True): + with self._netCDF_var(cube): + pass + + class _Common__check_attribute_compliance(object): def setUp(self): self.container = mock.Mock(name='container', attributes={}) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py b/lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py new file mode 100644 index 0000000000..3d2d74e869 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py @@ -0,0 +1,104 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +""" +Unit tests for the `iris.fileformats.netcdf._FillValueMaskCheckAndStoreTarget` +class. + +""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import mock +import numpy as np + +from iris.fileformats.netcdf import _FillValueMaskCheckAndStoreTarget + + +class Test__FillValueMaskCheckAndStoreTarget(tests.IrisTest): + def _call_target(self, fill_value, keys, vals): + inner_target = mock.MagicMock() + target = _FillValueMaskCheckAndStoreTarget(inner_target, + fill_value=fill_value) + + for key, val in zip(keys, vals): + target[key] = val + + calls = [mock.call(key, val) for key, val in zip(keys, vals)] + inner_target.__setitem__.assert_has_calls(calls) + + return target + + def test___setitem__(self): + self._call_target(None, [1], [2]) + + def test_no_fill_value_not_masked(self): + # Test when the fill value is not present and the data is not masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.arange(5)] + fill_value = 16 + target = self._call_target(fill_value, keys, vals) + self.assertFalse(target.contains_value) + self.assertFalse(target.is_masked) + + def test_contains_fill_value_not_masked(self): + # Test when the fill value is present and the data is not masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.arange(5)] + fill_value = 5 + target = self._call_target(fill_value, keys, vals) + self.assertTrue(target.contains_value) + self.assertFalse(target.is_masked) + + def test_no_fill_value_masked(self): + # Test when the fill value is not present and the data is masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.ma.masked_equal(np.arange(5), 3)] + fill_value = 16 + target = self._call_target(fill_value, keys, vals) + self.assertFalse(target.contains_value) + self.assertTrue(target.is_masked) + + def test_contains_fill_value_masked(self): + # Test when the fill value is present and the data is masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.ma.masked_equal(np.arange(5), 3)] + fill_value = 5 + target = self._call_target(fill_value, keys, vals) + self.assertTrue(target.contains_value) + self.assertTrue(target.is_masked) + + def test_fill_value_None(self): + # Test when the fill value is None + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.arange(5)] + fill_value = None + target = self._call_target(fill_value, keys, vals) + self.assertFalse(target.contains_value) + + def test_contains_masked_fill_value(self): + # Test when the fill value is present but masked the data is masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.ma.masked_equal(np.arange(10, 15), 13)] + fill_value = 13 + target = self._call_target(fill_value, keys, vals) + self.assertFalse(target.contains_value) + self.assertTrue(target.is_masked) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_save.py b/lib/iris/tests/unit/fileformats/netcdf/test_save.py index a6e991b14e..b08c3b7137 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_save.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_save.py @@ -28,7 +28,8 @@ import numpy as np import iris -from iris.cube import Cube +from iris.coords import DimCoord +from iris.cube import Cube, CubeList from iris.fileformats.netcdf import save, CF_CONVENTIONS_VERSION from iris.tests.stock import lat_lon_cube @@ -125,5 +126,96 @@ def test_no_unlimited_future_default(self): self.assertFalse(ds.dimensions['latitude'].isunlimited()) +class Test_fill_value(tests.IrisTest): + def setUp(self): + self.standard_names = ['air_temperature', + 'air_potential_temperature', + 'air_temperature_anomaly'] + + def _make_cubes(self): + lat = DimCoord(np.arange(3), 'latitude', units='degrees') + lon = DimCoord(np.arange(4), 'longitude', units='degrees') + data = np.arange(12, dtype='f4').reshape(3, 4) + return CubeList(Cube(data, standard_name=name, units='K', + dim_coords_and_dims=[(lat, 0), (lon, 1)]) + for name in self.standard_names) + + def test_None(self): + # Test that when no fill_value argument is passed, the fill_value + # argument to Saver.write is None or not present. + cubes = self._make_cubes() + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + save(cubes, 'dummy.nc') + + # Get the Saver.write mock + with Saver() as saver: + write = saver.write + + self.assertEqual(3, write.call_count) + for call in write.mock_calls: + _, _, kwargs = call + if 'fill_value' in kwargs: + self.assertIs(None, kwargs['fill_value']) + + def test_single(self): + # Test that when a signle value is passed as the fill_value argument, + # that value is passed to each call to Saver.write + cubes = self._make_cubes() + fill_value = 12345. + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + save(cubes, 'dummy.nc', fill_value=fill_value) + + # Get the Saver.write mock + with Saver() as saver: + write = saver.write + + self.assertEqual(3, write.call_count) + for call in write.mock_calls: + _, _, kwargs = call + self.assertEqual(fill_value, kwargs['fill_value']) + + def test_multiple(self): + # Test that when a list is passed as the fill_value argument, + # each element is passed to separate calls to Saver.write + cubes = self._make_cubes() + fill_values = [123., 456., 789.] + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + save(cubes, 'dummy.nc', fill_value=fill_values) + + # Get the Saver.write mock + with Saver() as saver: + write = saver.write + + self.assertEqual(3, write.call_count) + for call, fill_value in zip(write.mock_calls, fill_values): + _, _, kwargs = call + self.assertEqual(fill_value, kwargs['fill_value']) + + def test_single_string(self): + # Test that when a string is passed as the fill_value argument, + # that value is passed to calls to Saver.write + cube = Cube(['abc', 'def', 'hij']) + fill_value = 'xyz' + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + save(cube, 'dummy.nc', fill_value=fill_value) + + # Get the Saver.write mock + with Saver() as saver: + write = saver.write + + self.assertEqual(1, write.call_count) + _, _, kwargs = write.mock_calls[0] + self.assertEqual(fill_value, kwargs['fill_value']) + + def test_multi_wrong_length(self): + # Test that when a list of a different length to the number of cubes + # is passed as the fill_value argument, an error is raised + cubes = self._make_cubes() + fill_values = [1., 2., 3., 4.] + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + with self.assertRaises(ValueError): + save(cubes, 'dummy.nc', fill_value=fill_values) + + if __name__ == "__main__": tests.main() From 4e93e34d46b3cc7610eae1c66f13fdc9222940ea Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Fri, 25 Aug 2017 14:04:32 +0100 Subject: [PATCH 10/18] Short circuit mask and fill_value check --- lib/iris/fileformats/netcdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 43c13c779a..aa533f83e4 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -756,8 +756,8 @@ def __init__(self, target, fill_value=None): def __setitem__(self, keys, arr): if self.fill_value is not None: - self.contains_value |= self.fill_value in arr - self.is_masked |= ma.isMaskedArray(arr) + self.contains_value = self.contains_value or self.fill_value in arr + self.is_masked = self.is_masked or ma.isMaskedArray(arr) self.target[keys] = arr From 3ee3e24334902f0c0e6204291a4d60bfee60bca8 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Fri, 25 Aug 2017 15:07:45 +0100 Subject: [PATCH 11/18] Add comment and unpack ternary operators --- lib/iris/fileformats/netcdf.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index aa533f83e4..ce6d6c82cc 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -1903,6 +1903,8 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, scale_factor = packing.get('scale_factor', None) add_offset = packing.get('add_offset', None) else: + # We compute the scale_factor and add_offset based on the + # min/max of the data. This requires the data to be loaded. masked = ma.isMaskedArray(cube.data) dtype = np.dtype(packing) cmax = cube.data.max() @@ -1966,9 +1968,12 @@ def store(data, cf_var, fill_value): # If packing attributes are specified, don't bother checking whether # the fill value is in the data. - fill_value_to_check = None if packing else \ - fill_value if fill_value is not None else \ - netCDF4.default_fillvals[dtype.str[1:]] + if packing: + fill_value_to_check = None + elif fill_value is not None: + fill_value_to_check = fill_value + else: + fill_value_to_check = netCDF4.default_fillvals[dtype.str[1:]] # Store the data and check if it is masked and contains the fill value is_masked, contains_fill_value = store(data, cf_var, From 274462e1a3de92292e65849a14cae8102ce82e3f Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Tue, 29 Aug 2017 09:29:01 +0100 Subject: [PATCH 12/18] Raise error when invalid packing keys are specified --- lib/iris/fileformats/netcdf.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index ce6d6c82cc..fd37d7745e 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -1902,6 +1902,13 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, dtype = np.dtype(packing['dtype']) scale_factor = packing.get('scale_factor', None) add_offset = packing.get('add_offset', None) + valid_keys = {'dtype', 'scale_factor', 'add_offset'} + invalid_keys = set(packing.keys()) - valid_keys + if invalid_keys: + msg = ("Invalid packing key(s) found: '{}'. The valid " + "keys are '{}'.".format("', '".join(invalid_keys), + "', '".join(valid_keys))) + raise ValueError(msg) else: # We compute the scale_factor and add_offset based on the # min/max of the data. This requires the data to be loaded. @@ -2299,8 +2306,7 @@ def is_valid_packspec(p): else: if len(fill_values) != len(cubes): msg = ('If fill_value is a list, it must have the ' - 'same number of elements as the argument to' - 'cube.') + 'same number of elements as the cube argument.') raise ValueError(msg) # Initialise Manager for saving From beb8efedf86d2900cae2e83117cac3375d00c6df Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Tue, 29 Aug 2017 10:30:25 +0100 Subject: [PATCH 13/18] Add what's new entries --- .../incompatiblechange_2017-Aug-29_fill_value_save_arg.txt | 1 + .../incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt create mode 100644 docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt diff --git a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt new file mode 100644 index 0000000000..2935e2af6e --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt @@ -0,0 +1 @@ +* When saving a cube or list of cubes, a fill value or list of fill values can be specified via a new `fill_value` argument. If a `fill_value` argument is not specified, the default fill value for the file format and the cube's data type will be used. Fill values are no longer taken from the cube's `data` attribute when it is a masked array. diff --git a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt new file mode 100644 index 0000000000..91742a9fb1 --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt @@ -0,0 +1,3 @@ +* A 'fill_value' key can no longer be specified as part of the `packing` argument to `iris.save` when saving in netCDF format. Instead, a fill value should be specified as a separate `fill_value` argument if required. + +* A check is made that if the `packing` argument to `iris.save` is a dictionary then it contains no keys other than 'dtype', 'scale_factor' and 'add_offset' From d10294b9e78520a6826ee5a994448e5ba8f80e93 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Tue, 29 Aug 2017 16:16:09 +0100 Subject: [PATCH 14/18] Revert test --- lib/iris/tests/unit/fileformats/netcdf/test_Saver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py index a063d98137..4c356a651f 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py @@ -170,7 +170,7 @@ def test_zlib(self): dataset = api.Dataset.return_value create_var_calls = mock.call.createVariable( 'air_pressure_anomaly', np.dtype('float32'), ['dim0', 'dim1'], - fill_value=mock.ANY, shuffle=True, least_significant_digit=None, + fill_value=None, shuffle=True, least_significant_digit=None, contiguous=False, zlib=True, fletcher32=False, endian='native', complevel=4, chunksizes=None).call_list() dataset.assert_has_calls(create_var_calls) From 745ddd2709a7070043ae5f4455776ddfa5ec8627 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Tue, 29 Aug 2017 16:28:49 +0100 Subject: [PATCH 15/18] Amend what's new entries --- .../incompatiblechange_2017-Aug-29_fill_value_save_arg.txt | 2 +- .../incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt index 2935e2af6e..d773ea87be 100644 --- a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt +++ b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt @@ -1 +1 @@ -* When saving a cube or list of cubes, a fill value or list of fill values can be specified via a new `fill_value` argument. If a `fill_value` argument is not specified, the default fill value for the file format and the cube's data type will be used. Fill values are no longer taken from the cube's `data` attribute when it is a masked array. +* When saving a cube or list of cubes, a fill value or list of fill values can be specified via a new `fill_value` argument. If a list is supplied, each fill value will be applied to each cube in turn. If a `fill_value` argument is not specified, the default fill value for the file format and the cube's data type will be used. Fill values are no longer taken from the cube's `data` attribute when it is a masked array. diff --git a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt index 91742a9fb1..c8c9f18584 100644 --- a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt +++ b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt @@ -1,3 +1,3 @@ -* A 'fill_value' key can no longer be specified as part of the `packing` argument to `iris.save` when saving in netCDF format. Instead, a fill value should be specified as a separate `fill_value` argument if required. +* A 'fill_value' key can no longer be specified as part of the `packing` argument to `iris.save` when saving in netCDF format. Instead, a fill value or list of fill values should be specified as a separate `fill_value` argument if required. -* A check is made that if the `packing` argument to `iris.save` is a dictionary then it contains no keys other than 'dtype', 'scale_factor' and 'add_offset' +* If the `packing` argument to `iris.save` is a dictionary, an error is raised if it contains any keys other than 'dtype', 'scale_factor' and 'add_offset'. From f119bf54d2817199b7cbffd0c68eeadfaecd2aa7 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Wed, 30 Aug 2017 08:59:57 +0100 Subject: [PATCH 16/18] Use ma.is_masked in place of ma.isMaskedArray. Add clarifying comments to argument validation --- lib/iris/fileformats/netcdf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index fd37d7745e..d569e3c914 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -757,7 +757,7 @@ def __init__(self, target, fill_value=None): def __setitem__(self, keys, arr): if self.fill_value is not None: self.contains_value = self.contains_value or self.fill_value in arr - self.is_masked = self.is_masked or ma.isMaskedArray(arr) + self.is_masked = self.is_masked or ma.is_masked(arr) self.target[keys] = arr @@ -1951,7 +1951,7 @@ def set_packing_ncattrs(cfvar): def store(data, cf_var, fill_value): cf_var[:] = data - is_masked = ma.isMaskedArray(data) + is_masked = ma.is_masked(data) contains_value = fill_value is not None and fill_value in data return is_masked, contains_value else: @@ -2296,7 +2296,9 @@ def is_valid_packspec(p): raise ValueError(msg) packspecs = packing + # Make fill-value(s) into an iterable over cubes. if isinstance(fill_value, six.string_types): + # Strings are awkward -- handle separately. fill_values = repeat(fill_value) else: try: From 703494c5bb942597bb5dfbc4f1d73242688f1c2f Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Wed, 30 Aug 2017 16:55:15 +0100 Subject: [PATCH 17/18] Minor tweaks. Revert test --- lib/iris/tests/unit/fileformats/netcdf/test_Saver.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py index 4c356a651f..c75e130250 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py @@ -25,7 +25,6 @@ import iris.tests as tests from contextlib import contextmanager -import re import warnings import netCDF4 as nc @@ -33,7 +32,7 @@ from numpy import ma import iris -import iris._lazy_data +from iris._lazy_data import as_lazy_data from iris.coord_systems import (GeogCS, TransverseMercator, RotatedGeogCS, LambertConformal, Mercator, Stereographic, LambertAzimuthalEqualArea) @@ -164,7 +163,6 @@ def test_big_endian(self): def test_zlib(self): cube = self._simple_cube('>f4') with mock.patch('iris.fileformats.netcdf.netCDF4') as api: - api.default_fillvals = {'f4': 12345.} with Saver('/dummy/path', 'NETCDF4') as saver: saver.write(cube, zlib=True) dataset = api.Dataset.return_value @@ -342,7 +340,7 @@ def _make_cube(self, dtype, lazy=False, masked_value=None, data = np.ma.masked_array(data) data[masked_index] = ma.masked if lazy: - data = iris._lazy_data.as_lazy_data(data) + data = as_lazy_data(data) lat = DimCoord(np.arange(3), 'latitude', units='degrees') lon = DimCoord(np.arange(4), 'longitude', units='degrees') return Cube(data, standard_name='air_temperature', units='K', From c98585a6cef62116b86c2f5da62ec120ed18c1b7 Mon Sep 17 00:00:00 2001 From: Daniel Kirkham Date: Wed, 11 Oct 2017 12:12:07 +0100 Subject: [PATCH 18/18] Add import, update What's New, add test, and fix comments --- ...patiblechange_2017-Aug-29_fill_value_save_arg.txt | 2 +- lib/iris/fileformats/netcdf.py | 1 + lib/iris/tests/unit/fileformats/netcdf/test_Saver.py | 12 +++++++++++- lib/iris/tests/unit/fileformats/netcdf/test_save.py | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt index d773ea87be..d53450d50a 100644 --- a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt +++ b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt @@ -1 +1 @@ -* When saving a cube or list of cubes, a fill value or list of fill values can be specified via a new `fill_value` argument. If a list is supplied, each fill value will be applied to each cube in turn. If a `fill_value` argument is not specified, the default fill value for the file format and the cube's data type will be used. Fill values are no longer taken from the cube's `data` attribute when it is a masked array. +* When saving a cube or list of cubes in NetCDF format, a fill value or list of fill values can be specified via a new `fill_value` argument. If a list is supplied, each fill value will be applied to each cube in turn. If a `fill_value` argument is not specified, the default fill value for the file format and the cube's data type will be used. Fill values are no longer taken from the cube's `data` attribute when it is a masked array. diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index d569e3c914..76d44c90d6 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -48,6 +48,7 @@ from iris.aux_factory import HybridHeightFactory, HybridPressureFactory, \ OceanSigmaZFactory, OceanSigmaFactory, OceanSFactory, OceanSg1Factory, \ OceanSg2Factory +import iris.config import iris.coord_systems import iris.coords import iris.cube diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py index c75e130250..3ab339b21e 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py @@ -413,7 +413,8 @@ def test_mask_lazy_fill_value(self): self.assertTrue(var[index].mask) def test_mask_lazy_default_fill_value(self): - # Test that masked lazy data saves correctly when given a fill value. + # Test that masked lazy data saves correctly using the default fill + # value. index = (1, 1) cube = self._make_cube('>f4', masked_index=index, lazy=True) with self._netCDF_var(cube) as var: @@ -477,6 +478,15 @@ def test_masked_byte_default_fill_value(self): with self._netCDF_var(cube): pass + def test_masked_byte_fill_value_passed(self): + # Test that no warning is raised when saving masked byte data with a + # fill value supplied if the the data does not contain the fill_value. + fill_value = 100 + cube = self._make_cube('>i1', masked_value=2) + with self._warning_check('contains masked byte data', False): + with self._netCDF_var(cube, fill_value=fill_value): + pass + class _Common__check_attribute_compliance(object): def setUp(self): diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_save.py b/lib/iris/tests/unit/fileformats/netcdf/test_save.py index b08c3b7137..6aa8dcffc0 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_save.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_save.py @@ -158,7 +158,7 @@ def test_None(self): self.assertIs(None, kwargs['fill_value']) def test_single(self): - # Test that when a signle value is passed as the fill_value argument, + # Test that when a single value is passed as the fill_value argument, # that value is passed to each call to Saver.write cubes = self._make_cubes() fill_value = 12345.