Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions numcodecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
except ImportError: # pragma: no cover
pass

from numcodecs.astype import AsType
register_codec(AsType)

from numcodecs.delta import Delta
register_codec(Delta)

Expand Down
88 changes: 88 additions & 0 deletions numcodecs/astype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division

import numpy as np

from numcodecs.abc import Codec
from numcodecs.compat import buffer_copy, ndarray_from_buffer



class AsType(Codec):
"""Filter to convert data between different types.

Parameters
----------
encode_dtype : dtype
Data type to use for encoded data.
decode_dtype : dtype, optional
Data type to use for decoded data.

Notes
-----
If `encode_dtype` is of lower precision than `decode_dtype`, please be
aware that data loss can occur by writing data to disk using this filter.
No checks are made to ensure the casting will work in that direction and
data corruption will occur.

Examples
--------
>>> import numcodecs
>>> import numpy as np
>>> x = np.arange(100, 120, 2, dtype=np.int8)
>>> x
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8)
>>> f = numcodecs.AsType(encode_dtype=x.dtype, decode_dtype=np.int64)
>>> y = f.decode(x)
>>> y
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118])
>>> z = f.encode(y)
>>> z
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8)

""" # flake8: noqa

codec_id = 'astype'

def __init__(self, encode_dtype, decode_dtype):
self.encode_dtype = np.dtype(encode_dtype)
self.decode_dtype = np.dtype(decode_dtype)

def encode(self, buf):

# view input data as 1D array
arr = ndarray_from_buffer(buf, self.decode_dtype)

# convert and copy
enc = arr.astype(self.encode_dtype)

return enc

def decode(self, buf, out=None):

# view encoded data as 1D array
enc = ndarray_from_buffer(buf, self.encode_dtype)

# convert and copy
dec = enc.astype(self.decode_dtype)

# handle output
out = buffer_copy(dec, out)

return out

def get_config(self):
config = dict()
config['id'] = self.codec_id
config['encode_dtype'] = self.encode_dtype.str
config['decode_dtype'] = self.decode_dtype.str
return config

def __repr__(self):
return (
'%s(encode_dtype=%r, decode_dtype=%r)' % (
type(self).__name__,
self.encode_dtype.str,
self.decode_dtype.str
)
)
60 changes: 60 additions & 0 deletions numcodecs/tests/test_astype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division


import numpy as np
from numpy.testing import assert_array_equal
from nose.tools import eq_ as eq


from numcodecs.astype import AsType
from numcodecs.tests.common import check_encode_decode, check_config, \
check_repr


# mix of dtypes: integer, float
# mix of shapes: 1D, 2D, 3D
# mix of orders: C, F
arrays = [
np.arange(1000, dtype='i4'),
np.linspace(1000, 1001, 1000, dtype='f8').reshape(100, 10),
np.random.normal(loc=1000, scale=1, size=(10, 10, 10)),
np.random.randint(0, 200, size=1000, dtype='u2').reshape(100, 10,
order='F'),
]


def test_encode_decode():
for arr in arrays:
codec = AsType(encode_dtype=arr.dtype, decode_dtype=arr.dtype)
check_encode_decode(arr, codec)


def test_decode():
encode_dtype, decode_dtype = '<i4', '<i8'
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
arr = np.arange(10, 20, 1, dtype=encode_dtype)
expect = arr.astype(decode_dtype)
actual = codec.decode(arr)
assert_array_equal(expect, actual)
eq(np.dtype(decode_dtype), actual.dtype)


def test_encode():
encode_dtype, decode_dtype = '<i4', '<i8'
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
arr = np.arange(10, 20, 1, dtype=decode_dtype)
expect = arr.astype(encode_dtype)
actual = codec.encode(arr)
assert_array_equal(expect, actual)
eq(np.dtype(encode_dtype), actual.dtype)


def test_config():
encode_dtype, decode_dtype = '<i4', '<i8'
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
check_config(codec)


def test_repr():
check_repr("AsType(encode_dtype='<i4', decode_dtype='<i2')")