Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ Bug fixes

- Fix h5netcdf saving scalars with filters or chunks (:issue:`2563`).
By `Martin Raspaud <https://github.com/mraspaud>`_.
- Fix parsing of ``_Unsigned`` attribute set by OPENDAP servers. (:issue:`2583`).
By `Deepak Cherian <https://github.com/dcherian>`_


.. _whats-new.0.11.0:

Expand Down
8 changes: 6 additions & 2 deletions xarray/coding/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,11 @@ class UnsignedIntegerCoder(VariableCoder):
def encode(self, variable, name=None):
dims, data, attrs, encoding = unpack_for_encoding(variable)

if encoding.get('_Unsigned', False):
# from netCDF best practices
# https://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html
# "_Unsigned = "true" to indicate that
# integer data should be treated as unsigned"
if encoding.get('_Unsigned', 'false') == 'true':
pop_to(encoding, attrs, '_Unsigned')
signed_dtype = np.dtype('i%s' % data.dtype.itemsize)
if '_FillValue' in attrs:
Expand All @@ -266,7 +270,7 @@ def decode(self, variable, name=None):
unsigned = pop_to(attrs, encoding, '_Unsigned')

if data.dtype.kind == 'i':
if unsigned:
if unsigned == 'true':
unsigned_dtype = np.dtype('u%s' % data.dtype.itemsize)
transform = partial(np.asarray, dtype=unsigned_dtype)
data = lazy_elemwise_func(data, transform, unsigned_dtype)
Expand Down
97 changes: 59 additions & 38 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def create_masked_and_scaled_data():
def create_encoded_masked_and_scaled_data():
attributes = {'_FillValue': -1, 'add_offset': 10,
'scale_factor': np.float32(0.1)}
return Dataset({'x': ('t', [-1, -1, 0, 1, 2], attributes)})
return Dataset({'x': ('t', np.int16([-1, -1, 0, 1, 2]), attributes)})


def create_unsigned_masked_scaled_data():
Expand All @@ -95,11 +95,45 @@ def create_encoded_unsigned_masked_scaled_data():
# be represented in the signed form.
attributes = {'_FillValue': -1, '_Unsigned': 'true',
'add_offset': 10, 'scale_factor': np.float32(0.1)}
# Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned
sb = np.asarray([0, 1, 127, -128, -1], dtype='i1')
return Dataset({'x': ('t', sb, attributes)})


def create_bad_unsigned_masked_scaled_data():
encoding = {'_FillValue': 255, '_Unsigned': True, 'dtype': 'i1',
'add_offset': 10, 'scale_factor': np.float32(0.1)}
x = np.array([10.0, 10.1, 22.7, 22.8, np.nan], dtype=np.float32)
return Dataset({'x': ('t', x, {}, encoding)})


def create_bad_encoded_unsigned_masked_scaled_data():
# These are values as written to the file: the _FillValue will
# be represented in the signed form.
attributes = {'_FillValue': -1, '_Unsigned': True,
'add_offset': 10, 'scale_factor': np.float32(0.1)}
# Create signed data corresponding to [0, 1, 127, 128, 255] unsigned
sb = np.asarray([0, 1, 127, -128, -1], dtype='i1')
return Dataset({'x': ('t', sb, attributes)})


def create_signed_masked_scaled_data():
encoding = {'_FillValue': -127, '_Unsigned': 'false', 'dtype': 'i1',
'add_offset': 10, 'scale_factor': np.float32(0.1)}
x = np.array([-1.0, 10.1, 22.7, np.nan], dtype=np.float32)
return Dataset({'x': ('t', x, {}, encoding)})


def create_encoded_signed_masked_scaled_data():
# These are values as written to the file: the _FillValue will
# be represented in the signed form.
attributes = {'_FillValue': -127, '_Unsigned': 'false',
'add_offset': 10, 'scale_factor': np.float32(0.1)}
# Create signed data corresponding to [0, 1, 127, 128, 255] unsigned
sb = np.asarray([-110, 1, 127, -127], dtype='i1')
return Dataset({'x': ('t', sb, attributes)})


def create_boolean_data():
attributes = {'units': '-'}
return Dataset({'x': ('t', [True, False, False, True], attributes)})
Expand Down Expand Up @@ -617,65 +651,52 @@ def test_roundtrip_string_with_fill_value_nchar(self):
with self.roundtrip(original) as actual:
assert_identical(expected, actual)

def test_unsigned_roundtrip_mask_and_scale(self):
decoded = create_unsigned_masked_scaled_data()
encoded = create_encoded_unsigned_masked_scaled_data()
@pytest.mark.parametrize(
'decoded_fn, encoded_fn',
[(create_unsigned_masked_scaled_data,
create_encoded_unsigned_masked_scaled_data),
pytest.param(create_bad_unsigned_masked_scaled_data,
create_bad_encoded_unsigned_masked_scaled_data,
marks=pytest.mark.xfail(reason="Bad _Unsigned attribute.")),
(create_signed_masked_scaled_data,
create_encoded_signed_masked_scaled_data),
(create_masked_and_scaled_data,
create_encoded_masked_and_scaled_data)])
def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn):
decoded = decoded_fn()
encoded = encoded_fn()

with self.roundtrip(decoded) as actual:
for k in decoded.variables:
assert (decoded.variables[k].dtype ==
actual.variables[k].dtype)
assert_allclose(decoded, actual, decode_bytes=False)

with self.roundtrip(decoded,
open_kwargs=dict(decode_cf=False)) as actual:
# TODO: this assumes that all roundtrips will first
# encode. Is that something we want to test for?
for k in encoded.variables:
assert (encoded.variables[k].dtype ==
actual.variables[k].dtype)
assert_allclose(encoded, actual, decode_bytes=False)

with self.roundtrip(encoded,
open_kwargs=dict(decode_cf=False)) as actual:
for k in encoded.variables:
assert (encoded.variables[k].dtype ==
actual.variables[k].dtype)
assert_allclose(encoded, actual, decode_bytes=False)
# make sure roundtrip encoding didn't change the
# original dataset.
assert_allclose(
encoded, create_encoded_unsigned_masked_scaled_data())
with self.roundtrip(encoded) as actual:
for k in decoded.variables:
assert decoded.variables[k].dtype == \
actual.variables[k].dtype
assert_allclose(decoded, actual, decode_bytes=False)
with self.roundtrip(encoded,
open_kwargs=dict(decode_cf=False)) as actual:
for k in encoded.variables:
assert encoded.variables[k].dtype == \
actual.variables[k].dtype
assert_allclose(encoded, actual, decode_bytes=False)

def test_roundtrip_mask_and_scale(self):
decoded = create_masked_and_scaled_data()
encoded = create_encoded_masked_and_scaled_data()
with self.roundtrip(decoded) as actual:
assert_allclose(decoded, actual, decode_bytes=False)
with self.roundtrip(decoded,
open_kwargs=dict(decode_cf=False)) as actual:
# TODO: this assumes that all roundtrips will first
# encode. Is that something we want to test for?
assert_allclose(encoded, actual, decode_bytes=False)
with self.roundtrip(encoded,
open_kwargs=dict(decode_cf=False)) as actual:
assert_allclose(encoded, actual, decode_bytes=False)
# make sure roundtrip encoding didn't change the
# original dataset.
assert_allclose(encoded,
create_encoded_masked_and_scaled_data(),
decode_bytes=False)
assert_allclose(encoded, encoded_fn(), decode_bytes=False)

with self.roundtrip(encoded) as actual:
for k in decoded.variables:
assert (decoded.variables[k].dtype ==
actual.variables[k].dtype)
assert_allclose(decoded, actual, decode_bytes=False)
with self.roundtrip(encoded,
open_kwargs=dict(decode_cf=False)) as actual:
assert_allclose(encoded, actual, decode_bytes=False)

def test_coordinates_encoding(self):
def equals_latlon(obj):
Expand Down