From a1aa155cf9141b0156c6ea9cbf8dae06b0e016a3 Mon Sep 17 00:00:00 2001 From: pelson Date: Tue, 4 Oct 2016 16:28:40 +0100 Subject: [PATCH 1/2] Heuristically determine, when saving a NetCDF attribute, whether is should be a byte string or unicode. --- lib/iris/fileformats/netcdf.py | 64 +++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 6e1d4d224d..49dd4c3951 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -705,6 +705,31 @@ def load_cubes(filenames, callback=None): yield cube +def bytes_if_ascii(string): + """ + Convert the given string to a byte string (str in py2k, bytes in py3k) + iff the given string can be encoded to ascii, else maintain the type + of the inputted string. + + """ + if isinstance(string, six.string_types): + try: + return string.encode(encoding='ascii') + except (AttributeError, UnicodeEncodeError): + pass + return string + + +def setncattr(variable, name, attribute): + """ + Put the given attribute on the given netCDF4 Data type, casting + attributes as we go to bytes rather than unicode. + + """ + attribute = bytes_if_ascii(attribute) + return variable.setncattr(name, attribute) + + class Saver(object): """A manager for saving netcdf files.""" @@ -990,10 +1015,10 @@ def update_global_attributes(self, attributes=None, **kwargs): attributes = dict(attributes) for attr_name in sorted(attributes): - self._dataset.setncattr(attr_name, attributes[attr_name]) + setncattr(self._dataset, attr_name, attributes[attr_name]) for attr_name in sorted(kwargs): - self._dataset.setncattr(attr_name, kwargs[attr_name]) + setncattr(self._dataset, attr_name, kwargs[attr_name]) def _create_cf_dimensions(self, cube, dimension_names, unlimited_dimensions=None): @@ -1196,12 +1221,12 @@ def _add_aux_factories(self, cube, cf_var_cube, dimension_names): dimension_names, primary_coord) cf_var = self._dataset.variables[name] - cf_var.standard_name = std_name + setncattr(cf_var, 'standard_name', std_name) cf_var.axis = 'Z' # Update the formula terms. ft = formula_terms.split() ft = [name if t == cf_name else t for t in ft] - cf_var.formula_terms = ' '.join(ft) + setncattr(cf_var, 'formula_terms', ' '.join(ft)) # Update the cache. self._formula_terms_cache[key] = name # Update the associated cube variable. @@ -1209,9 +1234,9 @@ def _add_aux_factories(self, cube, cf_var_cube, dimension_names): coords = [name if c == cf_name else c for c in coords] cf_var_cube.coordinates = ' '.join(coords) else: - cf_var.standard_name = std_name + setncattr(cf_var, 'standard_name', std_name) cf_var.axis = 'Z' - cf_var.formula_terms = formula_terms + setncattr(cf_var, 'formula_terms', formula_terms) def _get_dim_names(self, cube): """ @@ -1299,7 +1324,7 @@ def _cf_coord_identity(self, coord): elif coord.standard_name == "longitude": units = 'degrees_east' - return coord.standard_name, coord.long_name, units + return (coord.standard_name, coord.long_name, units) def _ensure_valid_dtype(self, values, src_name, src_object): # NetCDF3 does not support int64 or unsigned ints, so we check @@ -1411,7 +1436,6 @@ def _get_coord_variable_name(self, cube, coord): name = 'unknown_scalar' # Convert to lower case and replace whitespace by underscores. cf_name = '_'.join(name.lower().split()) - return cf_name def _create_cf_cell_measure_variable(self, cube, dimension_names, @@ -1456,10 +1480,10 @@ def _create_cf_cell_measure_variable(self, cube, dimension_names, cf_var.units = str(cell_measure.units) if cell_measure.standard_name is not None: - cf_var.standard_name = cell_measure.standard_name + setncattr(cf_var, 'standard_name', cell_measure.standard_name) if cell_measure.long_name is not None: - cf_var.long_name = cell_measure.long_name + setncattr(cf_var, 'long_name', cell_measure.long_name) # Add any other custom coordinate attributes. for name in sorted(cell_measure.attributes): @@ -1467,7 +1491,7 @@ def _create_cf_cell_measure_variable(self, cube, dimension_names, # Don't clobber existing attributes. if not hasattr(cf_var, name): - cf_var.setncattr(name, value) + setncattr(cf_var, name, value) return cf_name @@ -1564,14 +1588,14 @@ def _create_cf_variable(self, cube, dimension_names, coord): cf_var.units = units if standard_name is not None: - cf_var.standard_name = standard_name + setncattr(cf_var, 'standard_name', standard_name) if long_name is not None: - cf_var.long_name = long_name + setncattr(cf_var, 'long_name', long_name) # Add the CF-netCDF calendar attribute. if coord.units.calendar: - cf_var.calendar = coord.units.calendar + setncattr(cf_var, 'calendar', coord.units.calendar) # Add any other custom coordinate attributes. for name in sorted(coord.attributes): @@ -1585,7 +1609,7 @@ def _create_cf_variable(self, cube, dimension_names, coord): # Don't clobber existing attributes. if not hasattr(cf_var, name): - cf_var.setncattr(name, value) + setncattr(cf_var, name, value) return cf_name @@ -1823,13 +1847,13 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, biggus.save([cube.lazy_data()], [cf_var], masked=True) if cube.standard_name: - cf_var.standard_name = cube.standard_name + setncattr(cf_var, 'standard_name', cube.standard_name) if cube.long_name: - cf_var.long_name = cube.long_name + setncattr(cf_var, 'long_name', cube.long_name) if cube.units != 'unknown': - cf_var.units = str(cube.units) + setncattr(cf_var, 'units', str(cube.units)) # Add data variable-only attribute names to local_keys. if local_keys is None: @@ -1863,13 +1887,13 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, 'global attribute.'.format(attr_name=attr_name) warnings.warn(msg) - cf_var.setncattr(attr_name, value) + setncattr(cf_var, attr_name, value) # Create the CF-netCDF data variable cell method attribute. cell_methods = self._create_cf_cell_methods(cube, dimension_names) if cell_methods: - cf_var.cell_methods = cell_methods + setncattr(cf_var, 'cell_methods', cell_methods) # Create the CF-netCDF grid mapping. self._create_cf_grid_mapping(cube, cf_var) From 1e1f59a92ab454e9030c71f8d16a161ba04f0058 Mon Sep 17 00:00:00 2001 From: pelson Date: Fri, 7 Oct 2016 09:11:02 +0100 Subject: [PATCH 2/2] Added a what's new, and made the NC_CHAR functions private. --- ...gfix_2016-Oct-06_netcdf_string_unicode.txt | 5 +++ lib/iris/fileformats/netcdf.py | 45 ++++++++++--------- 2 files changed, 29 insertions(+), 21 deletions(-) create mode 100644 docs/iris/src/whatsnew/contributions_1.11/bugfix_2016-Oct-06_netcdf_string_unicode.txt diff --git a/docs/iris/src/whatsnew/contributions_1.11/bugfix_2016-Oct-06_netcdf_string_unicode.txt b/docs/iris/src/whatsnew/contributions_1.11/bugfix_2016-Oct-06_netcdf_string_unicode.txt new file mode 100644 index 0000000000..c028afcefe --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_1.11/bugfix_2016-Oct-06_netcdf_string_unicode.txt @@ -0,0 +1,5 @@ +* When saving to NetCDF, the existing behaviour of writing string attributes as ASCII has been + maintained across all versions of netCDF4-python. Where an attribute can not be encoded as ASCII + it will be passed through to the underlying netCDF4-python as unicode, and the behaviour will be + netCDF4-python version dependent (since v1.2 unicode are written as a NC_STRING rather than NC_CHAR type). + https://github.com/SciTools/iris/pull/2158 refers. diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 49dd4c3951..62279ba20d 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -705,12 +705,15 @@ def load_cubes(filenames, callback=None): yield cube -def bytes_if_ascii(string): +def _bytes_if_ascii(string): """ Convert the given string to a byte string (str in py2k, bytes in py3k) iff the given string can be encoded to ascii, else maintain the type of the inputted string. + Note: passing objects without an `encode` method (such as None) will + be returned by the function unchanged. + """ if isinstance(string, six.string_types): try: @@ -720,13 +723,13 @@ def bytes_if_ascii(string): return string -def setncattr(variable, name, attribute): +def _setncattr(variable, name, attribute): """ Put the given attribute on the given netCDF4 Data type, casting attributes as we go to bytes rather than unicode. """ - attribute = bytes_if_ascii(attribute) + attribute = _bytes_if_ascii(attribute) return variable.setncattr(name, attribute) @@ -1015,10 +1018,10 @@ def update_global_attributes(self, attributes=None, **kwargs): attributes = dict(attributes) for attr_name in sorted(attributes): - setncattr(self._dataset, attr_name, attributes[attr_name]) + _setncattr(self._dataset, attr_name, attributes[attr_name]) for attr_name in sorted(kwargs): - setncattr(self._dataset, attr_name, kwargs[attr_name]) + _setncattr(self._dataset, attr_name, kwargs[attr_name]) def _create_cf_dimensions(self, cube, dimension_names, unlimited_dimensions=None): @@ -1221,12 +1224,12 @@ def _add_aux_factories(self, cube, cf_var_cube, dimension_names): dimension_names, primary_coord) cf_var = self._dataset.variables[name] - setncattr(cf_var, 'standard_name', std_name) + _setncattr(cf_var, 'standard_name', std_name) cf_var.axis = 'Z' # Update the formula terms. ft = formula_terms.split() ft = [name if t == cf_name else t for t in ft] - setncattr(cf_var, 'formula_terms', ' '.join(ft)) + _setncattr(cf_var, 'formula_terms', ' '.join(ft)) # Update the cache. self._formula_terms_cache[key] = name # Update the associated cube variable. @@ -1234,9 +1237,9 @@ def _add_aux_factories(self, cube, cf_var_cube, dimension_names): coords = [name if c == cf_name else c for c in coords] cf_var_cube.coordinates = ' '.join(coords) else: - setncattr(cf_var, 'standard_name', std_name) + _setncattr(cf_var, 'standard_name', std_name) cf_var.axis = 'Z' - setncattr(cf_var, 'formula_terms', formula_terms) + _setncattr(cf_var, 'formula_terms', formula_terms) def _get_dim_names(self, cube): """ @@ -1480,10 +1483,10 @@ def _create_cf_cell_measure_variable(self, cube, dimension_names, cf_var.units = str(cell_measure.units) if cell_measure.standard_name is not None: - setncattr(cf_var, 'standard_name', cell_measure.standard_name) + _setncattr(cf_var, 'standard_name', cell_measure.standard_name) if cell_measure.long_name is not None: - setncattr(cf_var, 'long_name', cell_measure.long_name) + _setncattr(cf_var, 'long_name', cell_measure.long_name) # Add any other custom coordinate attributes. for name in sorted(cell_measure.attributes): @@ -1491,7 +1494,7 @@ def _create_cf_cell_measure_variable(self, cube, dimension_names, # Don't clobber existing attributes. if not hasattr(cf_var, name): - setncattr(cf_var, name, value) + _setncattr(cf_var, name, value) return cf_name @@ -1588,14 +1591,14 @@ def _create_cf_variable(self, cube, dimension_names, coord): cf_var.units = units if standard_name is not None: - setncattr(cf_var, 'standard_name', standard_name) + _setncattr(cf_var, 'standard_name', standard_name) if long_name is not None: - setncattr(cf_var, 'long_name', long_name) + _setncattr(cf_var, 'long_name', long_name) # Add the CF-netCDF calendar attribute. if coord.units.calendar: - setncattr(cf_var, 'calendar', coord.units.calendar) + _setncattr(cf_var, 'calendar', coord.units.calendar) # Add any other custom coordinate attributes. for name in sorted(coord.attributes): @@ -1609,7 +1612,7 @@ def _create_cf_variable(self, cube, dimension_names, coord): # Don't clobber existing attributes. if not hasattr(cf_var, name): - setncattr(cf_var, name, value) + _setncattr(cf_var, name, value) return cf_name @@ -1847,13 +1850,13 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, biggus.save([cube.lazy_data()], [cf_var], masked=True) if cube.standard_name: - setncattr(cf_var, 'standard_name', cube.standard_name) + _setncattr(cf_var, 'standard_name', cube.standard_name) if cube.long_name: - setncattr(cf_var, 'long_name', cube.long_name) + _setncattr(cf_var, 'long_name', cube.long_name) if cube.units != 'unknown': - setncattr(cf_var, 'units', str(cube.units)) + _setncattr(cf_var, 'units', str(cube.units)) # Add data variable-only attribute names to local_keys. if local_keys is None: @@ -1887,13 +1890,13 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, 'global attribute.'.format(attr_name=attr_name) warnings.warn(msg) - setncattr(cf_var, attr_name, value) + _setncattr(cf_var, attr_name, value) # Create the CF-netCDF data variable cell method attribute. cell_methods = self._create_cf_cell_methods(cube, dimension_names) if cell_methods: - setncattr(cf_var, 'cell_methods', cell_methods) + _setncattr(cf_var, 'cell_methods', cell_methods) # Create the CF-netCDF grid mapping. self._create_cf_grid_mapping(cube, cf_var)