diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 12808c6966..cb633af27a 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -462,7 +462,7 @@ Diagnostic information about arrays and groups is available via the ``info`` pro Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) Store type : zarr.storage.DictStore No. bytes : 8000000 (7.6M) - No. bytes stored : 38482 (37.6K) + No. bytes stored : 38484 (37.6K) Storage ratio : 207.9 Chunks initialized : 10/10 diff --git a/zarr/creation.py b/zarr/creation.py index ec32de64bc..0e3e3750cc 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -100,9 +100,13 @@ def create(shape, chunks=None, dtype=None, compressor='default', # handle polymorphic store arg store = _handle_store_arg(store) - # compatibility + # API compatibility with h5py compressor, fill_value = _handle_kwargs(compressor, fill_value, kwargs) + # ensure fill_value of correct type + if fill_value is not None: + fill_value = np.array(fill_value, dtype=dtype)[()] + # initialize array metadata init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, @@ -401,9 +405,13 @@ def open_array(store=None, mode='a', shape=None, chunks=None, dtype=None, store = _handle_store_arg(store) path = normalize_storage_path(path) - # compatibility + # API compatibility with h5py compressor, fill_value = _handle_kwargs(compressor, fill_value, kwargs) + # ensure fill_value of correct type + if fill_value is not None: + fill_value = np.array(fill_value, dtype=dtype)[()] + # ensure store is initialized if mode in ['r', 'r+']: diff --git a/zarr/meta.py b/zarr/meta.py index 0eefabb0f4..59fe2d22d5 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division import json +import base64 import numpy as np @@ -40,13 +41,14 @@ def decode_array_metadata(s): def encode_array_metadata(meta): + dtype = meta['dtype'] meta = dict( zarr_format=ZARR_FORMAT, shape=meta['shape'], chunks=meta['chunks'], - dtype=encode_dtype(meta['dtype']), + dtype=encode_dtype(dtype), compressor=meta['compressor'], - fill_value=encode_fill_value(meta['fill_value']), + fill_value=encode_fill_value(meta['fill_value'], dtype), order=meta['order'], filters=meta['filters'], ) @@ -110,6 +112,9 @@ def encode_group_metadata(meta=None): def decode_fill_value(v, dtype): + # early out + if v is None: + return v if dtype.kind == 'f': if v == 'NaN': return np.nan @@ -118,13 +123,23 @@ def decode_fill_value(v, dtype): elif v == '-Infinity': return np.NINF else: + return np.array(v, dtype=dtype)[()] + elif dtype.kind == 'S': + try: + return base64.standard_b64decode(v) + except Exception: + # be lenient, allow for other values that may have been used before base64 encoding + # and may work as fill values, e.g., the number 0 return v else: return v -def encode_fill_value(v): - try: +def encode_fill_value(v, dtype): + # early out + if v is None: + return v + if dtype.kind == 'f': if np.isnan(v): return 'NaN' elif np.isposinf(v): @@ -132,6 +147,15 @@ def encode_fill_value(v): elif np.isneginf(v): return '-Infinity' else: - return v - except TypeError: + return float(v) + elif dtype.kind in 'ui': + return int(v) + elif dtype.kind == 'b': + return bool(v) + elif dtype.kind == 'S': + v = base64.standard_b64encode(v) + if not PY2: + v = str(v, 'ascii') + return v + else: return v diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index 8c1d67f2b0..d1f1814cf2 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division import json +import base64 from nose.tools import eq_ as eq, assert_is_none, assert_raises import numpy as np -from zarr.compat import binary_type, text_type +from zarr.compat import binary_type, text_type, PY2 from zarr.meta import decode_array_metadata, encode_dtype, decode_dtype, \ ZARR_FORMAT, decode_group_metadata, encode_array_metadata from zarr.errors import MetadataError @@ -113,7 +114,7 @@ def test_encode_decode_array_2(): eq([df.get_config()], meta_dec['filters']) -def test_encode_decode_array_fill_values(): +def test_encode_decode_fill_values_nan(): fills = ( (np.nan, "NaN", np.isnan), @@ -154,6 +155,47 @@ def test_encode_decode_array_fill_values(): assert f(actual) +def test_encode_decode_fill_values_bytes(): + + fills = b'foo', bytes(10) + + for v in fills: + + s = base64.standard_b64encode(v) + if not PY2: + s = str(s, 'ascii') + + meta = dict( + shape=(100,), + chunks=(10,), + dtype=np.dtype('S10'), + compressor=Zlib(1).get_config(), + fill_value=v, + filters=None, + order='C' + ) + + meta_json = '''{ + "chunks": [10], + "compressor": {"id": "zlib", "level": 1}, + "dtype": "|S10", + "fill_value": "%s", + "filters": null, + "order": "C", + "shape": [100], + "zarr_format": %s + }''' % (s, ZARR_FORMAT) + + # test encoding + meta_enc = encode_array_metadata(meta) + assert_json_eq(meta_json, meta_enc) + + # test decoding + meta_dec = decode_array_metadata(meta_enc) + actual = meta_dec['fill_value'] + eq(v, actual) + + def test_decode_array_unsupported_format(): # unsupported format