diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index 425d10df..973d6ea1 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -49,6 +49,9 @@ except ImportError: # pragma: no cover pass +from numcodecs.astype import AsType +register_codec(AsType) + from numcodecs.delta import Delta register_codec(Delta) diff --git a/numcodecs/astype.py b/numcodecs/astype.py new file mode 100644 index 00000000..6ecc658e --- /dev/null +++ b/numcodecs/astype.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function, division + +import numpy as np + +from numcodecs.abc import Codec +from numcodecs.compat import buffer_copy, ndarray_from_buffer + + + +class AsType(Codec): + """Filter to convert data between different types. + + Parameters + ---------- + encode_dtype : dtype + Data type to use for encoded data. + decode_dtype : dtype, optional + Data type to use for decoded data. + + Notes + ----- + If `encode_dtype` is of lower precision than `decode_dtype`, please be + aware that data loss can occur by writing data to disk using this filter. + No checks are made to ensure the casting will work in that direction and + data corruption will occur. + + Examples + -------- + >>> import numcodecs + >>> import numpy as np + >>> x = np.arange(100, 120, 2, dtype=np.int8) + >>> x + array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8) + >>> f = numcodecs.AsType(encode_dtype=x.dtype, decode_dtype=np.int64) + >>> y = f.decode(x) + >>> y + array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118]) + >>> z = f.encode(y) + >>> z + array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8) + + """ # flake8: noqa + + codec_id = 'astype' + + def __init__(self, encode_dtype, decode_dtype): + self.encode_dtype = np.dtype(encode_dtype) + self.decode_dtype = np.dtype(decode_dtype) + + def encode(self, buf): + + # view input data as 1D array + arr = ndarray_from_buffer(buf, self.decode_dtype) + + # convert and copy + enc = arr.astype(self.encode_dtype) + + return enc + + def decode(self, buf, out=None): + + # view encoded data as 1D array + enc = ndarray_from_buffer(buf, self.encode_dtype) + + # convert and copy + dec = enc.astype(self.decode_dtype) + + # handle output + out = buffer_copy(dec, out) + + return out + + def get_config(self): + config = dict() + config['id'] = self.codec_id + config['encode_dtype'] = self.encode_dtype.str + config['decode_dtype'] = self.decode_dtype.str + return config + + def __repr__(self): + return ( + '%s(encode_dtype=%r, decode_dtype=%r)' % ( + type(self).__name__, + self.encode_dtype.str, + self.decode_dtype.str + ) + ) diff --git a/numcodecs/tests/test_astype.py b/numcodecs/tests/test_astype.py new file mode 100644 index 00000000..9211b164 --- /dev/null +++ b/numcodecs/tests/test_astype.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function, division + + +import numpy as np +from numpy.testing import assert_array_equal +from nose.tools import eq_ as eq + + +from numcodecs.astype import AsType +from numcodecs.tests.common import check_encode_decode, check_config, \ + check_repr + + +# mix of dtypes: integer, float +# mix of shapes: 1D, 2D, 3D +# mix of orders: C, F +arrays = [ + np.arange(1000, dtype='i4'), + np.linspace(1000, 1001, 1000, dtype='f8').reshape(100, 10), + np.random.normal(loc=1000, scale=1, size=(10, 10, 10)), + np.random.randint(0, 200, size=1000, dtype='u2').reshape(100, 10, + order='F'), +] + + +def test_encode_decode(): + for arr in arrays: + codec = AsType(encode_dtype=arr.dtype, decode_dtype=arr.dtype) + check_encode_decode(arr, codec) + + +def test_decode(): + encode_dtype, decode_dtype = '