diff --git a/lib/iris/_cube_coord_common.py b/lib/iris/_cube_coord_common.py index dd49355bf4..b147bd9411 100644 --- a/lib/iris/_cube_coord_common.py +++ b/lib/iris/_cube_coord_common.py @@ -33,8 +33,7 @@ class LimitedAttributeDict(dict): 'calendar', 'leap_month', 'leap_year', 'month_lengths', 'coordinates', 'grid_mapping', 'climatology', 'cell_methods', 'formula_terms', 'compress', - 'missing_value', 'add_offset', 'scale_factor', - '_FillValue') + 'missing_value', '_FillValue') def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 6e1d4d224d..9de33b1554 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -83,13 +83,14 @@ 'cell_measures', 'cell_methods', 'climatology', 'compress', 'coordinates', '_FillValue', 'formula_terms', 'grid_mapping', 'leap_month', 'leap_year', 'long_name', 'missing_value', - 'month_lengths', 'scale_factor', 'standard_error_multiplier', + 'month_lengths', 'standard_error_multiplier', 'scale_factor', 'standard_name', 'units'] # CF attributes that should not be global. -_CF_DATA_ATTRS = ['flag_masks', 'flag_meanings', 'flag_values', +_CF_DATA_ATTRS = ['add_offset', + 'flag_masks', 'flag_meanings', 'flag_values', 'instance_dimension', 'sample_dimension', - 'standard_error_multiplier'] + 'scale_factor', 'standard_error_multiplier'] # CF attributes that should only be global. _CF_GLOBAL_ATTRS = ['conventions', 'featureType', 'history', 'title'] @@ -776,13 +777,13 @@ def __exit__(self, type, value, traceback): def write(self, cube, local_keys=None, unlimited_dimensions=None, zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', - least_significant_digit=None): + least_significant_digit=None, pack_dtype=None): """ Wrapper for saving cubes to a NetCDF file. Args: - * cube (:class:`iris.cube.Cube`): + * cube (:class:`iris.cube.Cube` A :class:`iris.cube.Cube` to be saved to a netCDF file. Kwargs: @@ -855,6 +856,23 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, place in unpacked data that is a reliable value". Default is `None`, or no quantization, or 'lossless' compression. + * pack_dtype (type or string): + A numpy integer datatype (signed or unsigned) or a string that + describes a numpy integer dtype (i.e. 'i2', 'short', 'u4'). This + provides support for netCDF data packing as described in + http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values + If either `scale_factor` or `add_offset` are set in + `cube.attributes`, those values are used. If neither is set, + appropriate values of `scale_factor` and `add_offset` are + calculated based on `cube.data`, `pack_dtype` and possible masking + (see the link for details). Note that automatic calculation of + `scale_factor` and `add_offset` will trigger loading of lazy_data; + set `scale_factor` and `add_offset` manually in `cube.attributes` + if you wish to avoid this. For masked data, `fill_values` are taken + from `netCDF4.default_fillvals`. The default is `None`, in which + case the datatype is determined from the cube and no packing will + occur. + Returns: None. @@ -878,6 +896,12 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, else: _no_unlim_dep_warning() + if pack_dtype and np.dtype(pack_dtype).kind not in ('u', 'i'): + msg = """Argument pack_dtype must be None or a numpy integer + type or a string representation of a numpy integer + type.""" + raise ValueError(msg) + cf_profile_available = (iris.site_configuration.get('cf_profile') not in [None, False]) if cf_profile_available: @@ -902,7 +926,8 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, cube, dimension_names, local_keys, zlib=zlib, complevel=complevel, shuffle=shuffle, fletcher32=fletcher32, contiguous=contiguous, chunksizes=chunksizes, endian=endian, - least_significant_digit=least_significant_digit) + least_significant_digit=least_significant_digit, + pack_dtype=pack_dtype) # Add coordinate variables. self._add_dim_coords(cube, dimension_names) @@ -1782,6 +1807,12 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, An interable of cube attribute keys. Any cube attributes with matching keys will become attributes on the data variable. + * pack_dtype (numpy datatype or string): + A numpy datatype or a string that describes a numpy dtype. If not + provided, the datatype is the same as the cube.data array. This + allows variable packing when cube attributes `add_offset` and/or + `scale_factor` have been set. + All other keywords are passed through to the dataset's `createVariable` method. @@ -1793,78 +1824,136 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, while cf_name in self._dataset.variables: cf_name = self._increment_name(cf_name) + if 'pack_dtype' in kwargs: + datatype = kwargs.pop('pack_dtype') + else: + datatype = None + + def set_local_attrs(cf_var, local_keys): + """ Attributes add_offset and/or scale_factor must be set + after netCDF variable creation but before data is saved + to the netCDF Dataset. + """ + if cube.standard_name: + cf_var.standard_name = cube.standard_name + + if cube.long_name: + cf_var.long_name = cube.long_name + + if cube.units != 'unknown': + cf_var.units = str(cube.units) + + # Add data variable-only attribute names to local_keys. + if local_keys is None: + local_keys = set() + else: + local_keys = set(local_keys) + local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) + + # Add any cube attributes whose keys are in local_keys as + # CF-netCDF data variable attributes. + attr_names = set(cube.attributes).intersection(local_keys) + for attr_name in sorted(attr_names): + # Do not output 'conventions' attribute. + if attr_name.lower() == 'conventions': + continue + + value = cube.attributes[attr_name] + + if attr_name == 'STASH': + # Adopting provisional Metadata Conventions for + # representing MO + # Scientific Data encoded in NetCDF Format. + attr_name = 'um_stash_source' + value = str(value) + + if attr_name == "ukmo__process_flags": + value = " ".join([x.replace(" ", "_") for x in value]) + + if attr_name in _CF_GLOBAL_ATTRS: + msg = '{attr_name!r} is being added as CF data variable ' \ + 'attribute, but {attr_name!r} should only be a CF ' \ + 'global attribute.'.format(attr_name=attr_name) + warnings.warn(msg) + + cf_var.setncattr(attr_name, value) + + # calculate and set suitable scale_factor and add_offset if not set + if (datatype is not None and + 'scale_factor' not in cube.attributes and + 'add_offset' not in cube.attributes): + + dt = np.dtype(datatype) + cmax = cube.data.max() + cmin = cube.data.min() + n = dt.itemsize * 8 + if isinstance(cube.data, np.ma.core.MaskedArray): + masked = True + else: + masked = False + if masked: + scale_factor = (cmax - cmin)/(2**n-2) + else: + scale_factor = (cmax-cmin)/(2**n-1) + if dt.kind == 'u': + add_offset = cmin + elif dt.kind == 'i': + if masked: + add_offset = (cmax + cmin)/2 + else: + add_offset = cmin + 2**(n-1)*scale_factor + cf_var.setncattr('add_offset', add_offset) + cf_var.setncattr('scale_factor', scale_factor) + # if netcdf3 avoid streaming due to dtype handling - if (not cube.has_lazy_data() - or self._dataset.file_format in ('NETCDF3_CLASSIC', - 'NETCDF3_64BIT')): + if (not cube.has_lazy_data() or + self._dataset.file_format in ('NETCDF3_CLASSIC', + 'NETCDF3_64BIT')): # Determine whether there is a cube MDI value. fill_value = None if isinstance(cube.data, ma.core.MaskedArray): - fill_value = cube.data.fill_value + if datatype is not None: + dtstr = np.dtype(datatype).str[1:] + fill_value = netCDF4.default_fillvals[dtstr] + else: + fill_value = cube.data.fill_value # Get the values in a form which is valid for the file format. data = self._ensure_valid_dtype(cube.data, 'cube', cube) + if datatype is None: + dtype = data.dtype.newbyteorder('=') + else: + dtype = datatype + # Create the cube CF-netCDF data variable with data payload. cf_var = self._dataset.createVariable( - cf_name, data.dtype.newbyteorder('='), dimension_names, + cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs) + + set_local_attrs(cf_var, local_keys) cf_var[:] = data else: + if datatype is None: + dtype = cube.lazy_data().dtype.newbyteorder('=') + fill_value = cube.lazy_data().fill_value + else: + dtype = datatype + dtstr = np.dtype(datatype).str[1:] + fill_value = netCDF4.default_fillvals[dtstr] + # Create the cube CF-netCDF data variable. # Explicitly assign the fill_value, which will be the type default # in the case of an unmasked array. cf_var = self._dataset.createVariable( - cf_name, cube.lazy_data().dtype.newbyteorder('='), - dimension_names, fill_value=cube.lazy_data().fill_value, + cf_name, dtype, + dimension_names, fill_value=fill_value, **kwargs) + set_local_attrs(cf_var, local_keys) # stream the data biggus.save([cube.lazy_data()], [cf_var], masked=True) - if cube.standard_name: - cf_var.standard_name = cube.standard_name - - if cube.long_name: - cf_var.long_name = cube.long_name - - if cube.units != 'unknown': - cf_var.units = str(cube.units) - - # Add data variable-only attribute names to local_keys. - if local_keys is None: - local_keys = set() - else: - local_keys = set(local_keys) - local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) - - # Add any cube attributes whose keys are in local_keys as - # CF-netCDF data variable attributes. - attr_names = set(cube.attributes).intersection(local_keys) - for attr_name in sorted(attr_names): - # Do not output 'conventions' attribute. - if attr_name.lower() == 'conventions': - continue - - value = cube.attributes[attr_name] - - if attr_name == 'STASH': - # Adopting provisional Metadata Conventions for representing MO - # Scientific Data encoded in NetCDF Format. - attr_name = 'um_stash_source' - value = str(value) - - if attr_name == "ukmo__process_flags": - value = " ".join([x.replace(" ", "_") for x in value]) - - if attr_name in _CF_GLOBAL_ATTRS: - msg = '{attr_name!r} is being added as CF data variable ' \ - 'attribute, but {attr_name!r} should only be a CF ' \ - 'global attribute.'.format(attr_name=attr_name) - warnings.warn(msg) - - cf_var.setncattr(attr_name, value) - # Create the CF-netCDF data variable cell method attribute. cell_methods = self._create_cf_cell_methods(cube, dimension_names) @@ -1907,7 +1996,7 @@ def _increment_name(self, varname): def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, unlimited_dimensions=None, zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', - least_significant_digit=None): + least_significant_digit=None, pack_dtype=None): """ Save cube(s) to a netCDF file, given the cube and the filename. @@ -2004,6 +2093,25 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, in unpacked data that is a reliable value". Default is `None`, or no quantization, or 'lossless' compression. + * pack_dtype (type or string or list): + A numpy integer datatype (signed or unsigned) or a string that + describes a numpy integer dtype (i.e. 'i2', 'short', 'u4'). This + provides support for netCDF data packing as described in + http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values + If either `scale_factor` or `add_offset` are set in `cube.attributes`, + those values are used. If neither is set, appropriate values of + `scale_factor` and `add_offset` are calculated based on `cube.data`, + `pack_dtype` and possible masking (see the link for details). Note + that automatic calculation of `scale_factor` and `add_offset` will + trigger loading of lazy_data; set `scale_factor` and `add_offset` + manually in `cube.attributes` if you wish to avoid this. For + masked data, `fill_values` are taken from `netCDF4.default_fillvals`. + If the argument to `cube` is an iterable and different datatypes are + desired for each cube, `pack_dtype` can also be a list with the same + number of elements as the iterable. The default is `None`, in which + case the datatype is determined from the cube and no packing will + occur. + Returns: None. @@ -2062,9 +2170,19 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, with Saver(filename, netcdf_format) as sman: # Iterate through the cubelist. for cube in cubes: + if isinstance(pack_dtype, list): + if len(pack_dtype) > 0: + dtype = pack_dtype.pop(0) + else: + msg = ('If pack_dtype is a list, it must have the ' + 'same number of elements as the argument to' + 'cube.') + raise ValueError(msg) + else: + dtype = pack_dtype sman.write(cube, local_keys, unlimited_dimensions, zlib, complevel, shuffle, fletcher32, contiguous, chunksizes, endian, - least_significant_digit) + least_significant_digit, pack_dtype=dtype) conventions = CF_CONVENTIONS_VERSION diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/multi_packed_multi_dtype.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/multi_packed_multi_dtype.cdl new file mode 100644 index 0000000000..9b93c01910 --- /dev/null +++ b/lib/iris/tests/results/unit/fileformats/netcdf/save/multi_packed_multi_dtype.cdl @@ -0,0 +1,68 @@ +dimensions: + time = UNLIMITED ; // (360 currently) + bnds = 2 ; + latitude = 73 ; + longitude = 96 ; +variables: + float air_temperature(time, latitude, longitude) ; + air_temperature:scale_factor = 0.0024257504786326 ; + air_temperature:add_offset = 261.648002426021 ; + air_temperature:standard_name = "air_temperature" ; + air_temperature:units = "K" ; + air_temperature:um_stash_source = "m01s03i236" ; + air_temperature:cell_methods = "time: maximum (interval: 1 hour)" ; + air_temperature:grid_mapping = "latitude_longitude" ; + air_temperature:coordinates = "forecast_period forecast_reference_time height" ; + int latitude_longitude ; + latitude_longitude:grid_mapping_name = "latitude_longitude" ; + latitude_longitude:longitude_of_prime_meridian = 0. ; + latitude_longitude:earth_radius = 6371229. ; + double time(time) ; + time:axis = "T" ; + time:bounds = "time_bnds" ; + time:units = "hours since 1970-01-01 00:00:00" ; + time:standard_name = "time" ; + time:calendar = "360_day" ; + double time_bnds(time, bnds) ; + float latitude(latitude) ; + latitude:axis = "Y" ; + latitude:units = "degrees_north" ; + latitude:standard_name = "latitude" ; + float longitude(longitude) ; + longitude:axis = "X" ; + longitude:units = "degrees_east" ; + longitude:standard_name = "longitude" ; + double forecast_period(time) ; + forecast_period:bounds = "forecast_period_bnds" ; + forecast_period:units = "hours" ; + forecast_period:standard_name = "forecast_period" ; + double forecast_period_bnds(time, bnds) ; + double forecast_reference_time ; + forecast_reference_time:units = "hours since 1970-01-01 00:00:00" ; + forecast_reference_time:standard_name = "forecast_reference_time" ; + forecast_reference_time:calendar = "360_day" ; + double height ; + height:units = "m" ; + height:standard_name = "height" ; + height:positive = "up" ; + float precipitation_flux(time, latitude, longitude) ; + precipitation_flux:standard_name = "precipitation_flux" ; + precipitation_flux:units = "kg m-2 s-1" ; + precipitation_flux:um_stash_source = "m01s05i216" ; + precipitation_flux:cell_methods = "time: mean (interval: 1 hour)" ; + precipitation_flux:grid_mapping = "latitude_longitude" ; + precipitation_flux:coordinates = "forecast_period forecast_reference_time" ; + float air_temperature_0(time, latitude, longitude) ; + air_temperature_0:scale_factor = 0.0020141666756075 ; + air_temperature_0:add_offset = 176.7872f ; + air_temperature_0:standard_name = "air_temperature" ; + air_temperature_0:units = "K" ; + air_temperature_0:um_stash_source = "m01s03i236" ; + air_temperature_0:cell_methods = "time: minimum (interval: 1 hour)" ; + air_temperature_0:grid_mapping = "latitude_longitude" ; + air_temperature_0:coordinates = "forecast_period forecast_reference_time height" ; + +// global attributes: + :source = "Data from Met Office Unified Model" ; + :Conventions = "CF-1.5" ; +} diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/multi_packed_single_dtype.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/multi_packed_single_dtype.cdl new file mode 100644 index 0000000000..a160be3287 --- /dev/null +++ b/lib/iris/tests/results/unit/fileformats/netcdf/save/multi_packed_single_dtype.cdl @@ -0,0 +1,70 @@ +dimensions: + time = UNLIMITED ; // (360 currently) + bnds = 2 ; + latitude = 73 ; + longitude = 96 ; +variables: + float air_temperature(time, latitude, longitude) ; + air_temperature:scale_factor = 0.0024257504786326 ; + air_temperature:add_offset = 261.648002426021 ; + air_temperature:standard_name = "air_temperature" ; + air_temperature:units = "K" ; + air_temperature:um_stash_source = "m01s03i236" ; + air_temperature:cell_methods = "time: maximum (interval: 1 hour)" ; + air_temperature:grid_mapping = "latitude_longitude" ; + air_temperature:coordinates = "forecast_period forecast_reference_time height" ; + int latitude_longitude ; + latitude_longitude:grid_mapping_name = "latitude_longitude" ; + latitude_longitude:longitude_of_prime_meridian = 0. ; + latitude_longitude:earth_radius = 6371229. ; + double time(time) ; + time:axis = "T" ; + time:bounds = "time_bnds" ; + time:units = "hours since 1970-01-01 00:00:00" ; + time:standard_name = "time" ; + time:calendar = "360_day" ; + double time_bnds(time, bnds) ; + float latitude(latitude) ; + latitude:axis = "Y" ; + latitude:units = "degrees_north" ; + latitude:standard_name = "latitude" ; + float longitude(longitude) ; + longitude:axis = "X" ; + longitude:units = "degrees_east" ; + longitude:standard_name = "longitude" ; + double forecast_period(time) ; + forecast_period:bounds = "forecast_period_bnds" ; + forecast_period:units = "hours" ; + forecast_period:standard_name = "forecast_period" ; + double forecast_period_bnds(time, bnds) ; + double forecast_reference_time ; + forecast_reference_time:units = "hours since 1970-01-01 00:00:00" ; + forecast_reference_time:standard_name = "forecast_reference_time" ; + forecast_reference_time:calendar = "360_day" ; + double height ; + height:units = "m" ; + height:standard_name = "height" ; + height:positive = "up" ; + float precipitation_flux(time, latitude, longitude) ; + precipitation_flux:scale_factor = 2.9897383798121e-08 ; + precipitation_flux:add_offset = 0.000979677472296829 ; + precipitation_flux:standard_name = "precipitation_flux" ; + precipitation_flux:units = "kg m-2 s-1" ; + precipitation_flux:um_stash_source = "m01s05i216" ; + precipitation_flux:cell_methods = "time: mean (interval: 1 hour)" ; + precipitation_flux:grid_mapping = "latitude_longitude" ; + precipitation_flux:coordinates = "forecast_period forecast_reference_time" ; + float air_temperature_0(time, latitude, longitude) ; + air_temperature_0:scale_factor = 0.0020141666756075 ; + air_temperature_0:add_offset = 242.787445071619 ; + air_temperature_0:standard_name = "air_temperature" ; + air_temperature_0:units = "K" ; + air_temperature_0:um_stash_source = "m01s03i236" ; + air_temperature_0:cell_methods = "time: minimum (interval: 1 hour)" ; + air_temperature_0:grid_mapping = "latitude_longitude" ; + air_temperature_0:coordinates = "forecast_period forecast_reference_time height" ; + +// global attributes: + :source = "Data from Met Office Unified Model" ; + :Conventions = "CF-1.5" ; +} diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_manual.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_manual.cdl new file mode 100644 index 0000000000..3710d61487 --- /dev/null +++ b/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_manual.cdl @@ -0,0 +1,50 @@ +dimensions: + latitude = UNLIMITED ; // (73 currently) + bnds = 2 ; + longitude = 96 ; +variables: + float air_temperature(latitude, longitude) ; + air_temperature:scale_factor = 0.00119806791576066 ; + air_temperature:add_offset = 267.40062344802 ; + air_temperature:standard_name = "air_temperature" ; + air_temperature:units = "K" ; + air_temperature:um_stash_source = "m01s03i236" ; + air_temperature:cell_methods = "time: mean (interval: 6 hour)" ; + air_temperature:grid_mapping = "latitude_longitude" ; + air_temperature:coordinates = "forecast_period forecast_reference_time height time" ; + int latitude_longitude ; + latitude_longitude:grid_mapping_name = "latitude_longitude" ; + latitude_longitude:longitude_of_prime_meridian = 0. ; + latitude_longitude:earth_radius = 6371229. ; + float latitude(latitude) ; + latitude:axis = "Y" ; + latitude:units = "degrees_north" ; + latitude:standard_name = "latitude" ; + float longitude(longitude) ; + longitude:axis = "X" ; + longitude:units = "degrees_east" ; + longitude:standard_name = "longitude" ; + double forecast_period ; + forecast_period:bounds = "forecast_period_bnds" ; + forecast_period:units = "hours" ; + forecast_period:standard_name = "forecast_period" ; + double forecast_period_bnds(bnds) ; + double forecast_reference_time ; + forecast_reference_time:units = "hours since 1970-01-01 00:00:00" ; + forecast_reference_time:standard_name = "forecast_reference_time" ; + forecast_reference_time:calendar = "gregorian" ; + double height ; + height:units = "m" ; + height:standard_name = "height" ; + height:positive = "up" ; + double time ; + time:bounds = "time_bnds" ; + time:units = "hours since 1970-01-01 00:00:00" ; + time:standard_name = "time" ; + time:calendar = "gregorian" ; + double time_bnds(bnds) ; + +// global attributes: + :source = "Data from Met Office Unified Model" ; + :Conventions = "CF-1.5" ; +} diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_signed.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_signed.cdl new file mode 100644 index 0000000000..3710d61487 --- /dev/null +++ b/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_signed.cdl @@ -0,0 +1,50 @@ +dimensions: + latitude = UNLIMITED ; // (73 currently) + bnds = 2 ; + longitude = 96 ; +variables: + float air_temperature(latitude, longitude) ; + air_temperature:scale_factor = 0.00119806791576066 ; + air_temperature:add_offset = 267.40062344802 ; + air_temperature:standard_name = "air_temperature" ; + air_temperature:units = "K" ; + air_temperature:um_stash_source = "m01s03i236" ; + air_temperature:cell_methods = "time: mean (interval: 6 hour)" ; + air_temperature:grid_mapping = "latitude_longitude" ; + air_temperature:coordinates = "forecast_period forecast_reference_time height time" ; + int latitude_longitude ; + latitude_longitude:grid_mapping_name = "latitude_longitude" ; + latitude_longitude:longitude_of_prime_meridian = 0. ; + latitude_longitude:earth_radius = 6371229. ; + float latitude(latitude) ; + latitude:axis = "Y" ; + latitude:units = "degrees_north" ; + latitude:standard_name = "latitude" ; + float longitude(longitude) ; + longitude:axis = "X" ; + longitude:units = "degrees_east" ; + longitude:standard_name = "longitude" ; + double forecast_period ; + forecast_period:bounds = "forecast_period_bnds" ; + forecast_period:units = "hours" ; + forecast_period:standard_name = "forecast_period" ; + double forecast_period_bnds(bnds) ; + double forecast_reference_time ; + forecast_reference_time:units = "hours since 1970-01-01 00:00:00" ; + forecast_reference_time:standard_name = "forecast_reference_time" ; + forecast_reference_time:calendar = "gregorian" ; + double height ; + height:units = "m" ; + height:standard_name = "height" ; + height:positive = "up" ; + double time ; + time:bounds = "time_bnds" ; + time:units = "hours since 1970-01-01 00:00:00" ; + time:standard_name = "time" ; + time:calendar = "gregorian" ; + double time_bnds(bnds) ; + +// global attributes: + :source = "Data from Met Office Unified Model" ; + :Conventions = "CF-1.5" ; +} diff --git a/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_unsigned.cdl b/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_unsigned.cdl new file mode 100644 index 0000000000..65d5c61d63 --- /dev/null +++ b/lib/iris/tests/results/unit/fileformats/netcdf/save/single_packed_unsigned.cdl @@ -0,0 +1,50 @@ +dimensions: + latitude = UNLIMITED ; // (73 currently) + bnds = 2 ; + longitude = 96 ; +variables: + float air_temperature(latitude, longitude) ; + air_temperature:scale_factor = 0.30790345435049 ; + air_temperature:add_offset = 228.1423f ; + air_temperature:standard_name = "air_temperature" ; + air_temperature:units = "K" ; + air_temperature:um_stash_source = "m01s03i236" ; + air_temperature:cell_methods = "time: mean (interval: 6 hour)" ; + air_temperature:grid_mapping = "latitude_longitude" ; + air_temperature:coordinates = "forecast_period forecast_reference_time height time" ; + int latitude_longitude ; + latitude_longitude:grid_mapping_name = "latitude_longitude" ; + latitude_longitude:longitude_of_prime_meridian = 0. ; + latitude_longitude:earth_radius = 6371229. ; + float latitude(latitude) ; + latitude:axis = "Y" ; + latitude:units = "degrees_north" ; + latitude:standard_name = "latitude" ; + float longitude(longitude) ; + longitude:axis = "X" ; + longitude:units = "degrees_east" ; + longitude:standard_name = "longitude" ; + double forecast_period ; + forecast_period:bounds = "forecast_period_bnds" ; + forecast_period:units = "hours" ; + forecast_period:standard_name = "forecast_period" ; + double forecast_period_bnds(bnds) ; + double forecast_reference_time ; + forecast_reference_time:units = "hours since 1970-01-01 00:00:00" ; + forecast_reference_time:standard_name = "forecast_reference_time" ; + forecast_reference_time:calendar = "gregorian" ; + double height ; + height:units = "m" ; + height:standard_name = "height" ; + height:positive = "up" ; + double time ; + time:bounds = "time_bnds" ; + time:units = "hours since 1970-01-01 00:00:00" ; + time:standard_name = "time" ; + time:calendar = "gregorian" ; + double time_bnds(bnds) ; + +// global attributes: + :source = "Data from Met Office Unified Model" ; + :Conventions = "CF-1.5" ; +} diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_save.py b/lib/iris/tests/unit/fileformats/netcdf/test_save.py index b1b76f56ce..a7b149b869 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_save.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_save.py @@ -96,5 +96,156 @@ def test_no_unlimited_future_default(self): self.assertFalse(ds.dimensions['latitude'].isunlimited()) +@tests.skip_data +class Test_packed_data(tests.IrisTest): + def _get_scale_factor_add_offset(self, cube, datatype): + dt = np.dtype(datatype) + cmax = cube.data.max() + cmin = cube.data.min() + n = dt.itemsize * 8 + if isinstance(cube.data, np.ma.core.MaskedArray): + masked = True + else: + masked = False + if masked: + scale_factor = (cmax - cmin)/(2**n-2) + else: + scale_factor = (cmax - cmin)/(2**n-1) + if dt.kind == 'u': + add_offset = cmin + elif dt.kind == 'i': + if masked: + add_offset = (cmax + cmin)/2 + else: + add_offset = cmin + 2**(n-1)*scale_factor + return (scale_factor, add_offset) + + def test_single_packed_signed(self): + """Test saving a single CF-netCDF file with packing.""" + # Read PP input file. + file_in = tests.get_data_path( + ('PP', 'cf_processing', + '000003000000.03.236.000128.1990.12.01.00.00.b.pp')) + cube = iris.load_cube(file_in) + datatype = 'i2' + scale_factor, offset = self._get_scale_factor_add_offset(cube, + datatype) + # Write Cube to netCDF file. + with self.temp_filename(suffix='.nc') as file_out: + iris.save(cube, file_out, pack_dtype=datatype) + decimal = int(-np.log10(scale_factor)) + packedcube = iris.load_cube(file_out) + # Check that packed cube is accurate to expected precision + self.assertArrayAlmostEqual(cube.data, packedcube.data, + decimal=decimal) + # Check the netCDF file against CDL expected output. + self.assertCDL(file_out, ('unit', 'fileformats', 'netcdf', + 'save', + 'single_packed_signed.cdl')) + + def test_single_packed_unsigned(self): + """Test saving a single CF-netCDF file with packing into unsigned. """ + # Read PP input file. + file_in = tests.get_data_path( + ('PP', 'cf_processing', + '000003000000.03.236.000128.1990.12.01.00.00.b.pp')) + cube = iris.load_cube(file_in) + datatype = 'u1' + scale_factor, offset = self._get_scale_factor_add_offset(cube, + datatype) + # Write Cube to netCDF file. + with self.temp_filename(suffix='.nc') as file_out: + iris.save(cube, file_out, pack_dtype=datatype) + decimal = int(-np.log10(scale_factor)) + packedcube = iris.load_cube(file_out) + # Check that packed cube is accurate to expected precision + self.assertArrayAlmostEqual(cube.data, packedcube.data, + decimal=decimal) + # Check the netCDF file against CDL expected output. + self.assertCDL(file_out, ('unit', 'fileformats', 'netcdf', + 'save', + 'single_packed_unsigned.cdl')) + + def test_single_packed_manual_scale(self): + """Test saving a single CF-netCDF file with packing with scale + factor and add_offset set manually.""" + file_in = tests.get_data_path( + ('PP', 'cf_processing', + '000003000000.03.236.000128.1990.12.01.00.00.b.pp')) + cube = iris.load_cube(file_in) + datatype = 'i2' + scale_factor, offset = self._get_scale_factor_add_offset(cube, + datatype) + cube.attributes['scale_factor'] = scale_factor + cube.attributes['add_offset'] = offset + + # Write Cube to netCDF file. + with self.temp_filename(suffix='.nc') as file_out: + from shutil import copyfile + iris.save(cube, file_out, pack_dtype=datatype) + decimal = int(-np.log10(scale_factor)) + packedcube = iris.load_cube(file_out) + # Check that packed cube is accurate to expected precision + # Check the netCDF file against CDL expected output. + self.assertCDL(file_out, ('unit', 'fileformats', 'netcdf', + 'save', + 'single_packed_manual.cdl')) + self.assertArrayAlmostEqual(cube.data, packedcube.data, + decimal=decimal) + + def test_multi_packed_single_dtype(self): + """Test saving multiple packed cubes with the same pack_dtype.""" + # Read PP input file. + file_in = tests.get_data_path(('PP', 'cf_processing', + 'abcza_pa19591997_daily_29.b.pp')) + cubes = iris.load(file_in) + dtype = 'i2' + # Write Cube to netCDF file. + with self.temp_filename(suffix='.nc') as file_out: + iris.save(cubes, file_out, pack_dtype=dtype) + # Check the netCDF file against CDL expected output. + self.assertCDL(file_out, ('unit', 'fileformats', 'netcdf', + 'save', + 'multi_packed_single_dtype.cdl')) + packedcubes = iris.load(file_out) + # ensure cube order is the same: + cubes.sort(key=lambda cube: cube.cell_methods[0].method) + packedcubes.sort(key=lambda cube: cube.cell_methods[0].method) + for cube, packedcube in zip(cubes, packedcubes): + sf, ao = self._get_scale_factor_add_offset(cube, dtype) + decimal = int(-np.log10(sf)) + # Check that packed cube is accurate to expected precision + self.assertArrayAlmostEqual(cube.data, packedcube.data, + decimal=decimal) + + def test_multi_packed_multi_dtype(self): + """Test saving multiple packed cubes with pack_dtype list.""" + # Read PP input file. + file_in = tests.get_data_path(('PP', 'cf_processing', + 'abcza_pa19591997_daily_29.b.pp')) + cubes = iris.load(file_in) + dtypes = ['i2', None, 'u2'] + # Write Cube to netCDF file. + with self.temp_filename(suffix='.nc') as file_out: + iris.save(cubes, file_out, pack_dtype=dtypes) + # Check the netCDF file against CDL expected output. + self.assertCDL(file_out, ('unit', 'fileformats', 'netcdf', + 'save', + 'multi_packed_multi_dtype.cdl')) + packedcubes = iris.load(file_out) + # ensure cube order is the same: + cubes.sort(key=lambda cube: cube.cell_methods[0].method) + packedcubes.sort(key=lambda cube: cube.cell_methods[0].method) + for cube, packedcube, dtype in zip(cubes, packedcubes, dtypes): + if dtype: + sf, ao = self._get_scale_factor_add_offset(cube, dtype) + decimal = int(-np.log10(sf)) + # Check that packed cube is accurate to expected precision + self.assertArrayAlmostEqual(cube.data, packedcube.data, + decimal=decimal) + else: + self.assertArrayEqual(cube.data, packedcube.data) + + if __name__ == "__main__": tests.main()