diff --git a/appveyor.yml b/appveyor.yml index 23d2231c12..4b47499976 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -40,18 +40,11 @@ environment: install: - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - - git submodule update --init --recursive build: off test_script: - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_dev.txt" - - "%CMD_IN_ENV% python setup.py build_ext --inplace" - - "%CMD_IN_ENV% python -m nose -v" - -after_test: - "%CMD_IN_ENV% python setup.py bdist_wheel" - -artifacts: - - path: dist\* + - "%CMD_IN_ENV% python -m pytest -v zarr" diff --git a/docs/release.rst b/docs/release.rst index a1568524e7..db49568f17 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -107,6 +107,17 @@ Enhancements * **New Array.hexdigest() method** computes an ``Array``'s hash with ``hashlib``. By :user:`John Kirkham `, :issue:`98`, :issue:`203`. +* **Improved support for object arrays**. In previous versions of Zarr, + creating an array with ``dtype=object`` was possible but could under certain + circumstances lead to unexpected errors and/or segmentation faults. To make it easier + to properly configure an object array, a new ``object_codec`` parameter has been + added to array creation functions. See the tutorial section on :ref:`tutorial_objects` + for more information and examples. Also, runtime checks have been added in both Zarr + and Numcodecs so that segmentation faults are no longer possible, even with a badly + configured array. This API change is backwards compatible and previous code that created + an object array and provided an object codec via the ``filters`` parameter will + continue to work, however a warning will be raised to encourage use of the + ``object_codec`` parameter. :issue:`208`, :issue:`212`. Bug fixes ~~~~~~~~~ diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 2a35290a53..1e46a8322f 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -178,8 +178,8 @@ print some diagnostics, e.g.:: : blocksize=0) Store type : builtins.dict No. bytes : 400000000 (381.5M) - No. bytes stored : 4565053 (4.4M) - Storage ratio : 87.6 + No. bytes stored : 3702484 (3.5M) + Storage ratio : 108.0 Chunks initialized : 100/100 If you don't specify a compressor, by default Zarr uses the Blosc @@ -270,8 +270,8 @@ Here is an example using a delta filter with the Blosc compressor:: Compressor : Blosc(cname='zstd', clevel=1, shuffle=SHUFFLE, blocksize=0) Store type : builtins.dict No. bytes : 400000000 (381.5M) - No. bytes stored : 648605 (633.4K) - Storage ratio : 616.7 + No. bytes stored : 328085 (320.4K) + Storage ratio : 1219.2 Chunks initialized : 100/100 For more information about available filter codecs, see the `Numcodecs @@ -394,8 +394,8 @@ property. E.g.:: Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) Store type : zarr.storage.DictStore No. bytes : 8000000 (7.6M) - No. bytes stored : 37480 (36.6K) - Storage ratio : 213.4 + No. bytes stored : 34840 (34.0K) + Storage ratio : 229.6 Chunks initialized : 10/10 >>> baz.info @@ -409,8 +409,8 @@ property. E.g.:: Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) Store type : zarr.storage.DictStore No. bytes : 4000000 (3.8M) - No. bytes stored : 23243 (22.7K) - Storage ratio : 172.1 + No. bytes stored : 20443 (20.0K) + Storage ratio : 195.7 Chunks initialized : 100/100 Groups also have the :func:`zarr.hierarchy.Group.tree` method, e.g.:: @@ -768,7 +768,6 @@ Here is an example using S3Map to read an array created previously:: b'Hello from the cloud!' - .. _tutorial_strings: String arrays @@ -788,40 +787,80 @@ your dataset, then you can use an array with a fixed-length bytes dtype. E.g.:: A fixed-length unicode dtype is also available, e.g.:: - >>> z = zarr.zeros(12, dtype='U20') >>> greetings = ['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', 'Hei maailma!', ... 'Xin chào thế giới', 'Njatjeta Botë!', 'Γεια σου κόσμε!', ... 'こんにちは世界', '世界,你好!', 'Helló, világ!', 'Zdravo svete!', ... 'เฮลโลเวิลด์'] - >>> z[:] = greetings + >>> text_data = greetings * 10000 + >>> z = zarr.array(text_data, dtype='U20') >>> z[:] - array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', 'Hei maailma!', - 'Xin chào thế giới', 'Njatjeta Botë!', 'Γεια σου κόσμε!', 'こんにちは世界', - '世界,你好!', 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], + array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ..., + 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype='>> import numcodecs - >>> z = zarr.zeros(12, dtype=object, filters=[numcodecs.Pickle()]) - >>> z[:] = greetings + >>> z = zarr.array(text_data, dtype=object, object_codec=numcodecs.JSON()) >>> z[:] - array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', 'Hei maailma!', - 'Xin chào thế giới', 'Njatjeta Botë!', 'Γεια σου κόσμε!', 'こんにちは世界', - '世界,你好!', 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object) + array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ..., + 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object) ...or alternatively using msgpack (requires `msgpack-python `_ to be installed):: - >>> z = zarr.zeros(12, dtype=object, filters=[numcodecs.MsgPack()]) - >>> z[:] = greetings + >>> z = zarr.array(text_data, dtype=object, object_codec=numcodecs.MsgPack()) + >>> z[:] + array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ..., + 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object) + +If you know ahead of time all the possible string values that can occur, then you could +also use the :class:`numcodecs.Categorize` codec to encode each unique value as an +integer. E.g.:: + + >>> categorize = numcodecs.Categorize(greetings, dtype=object) + >>> z = zarr.array(text_data, dtype=object, object_codec=categorize) + >>> z[:] + array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', ..., + 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object) + + +.. _tutorial_objects: + +Object arrays +------------- + +Zarr supports arrays with an "object" dtype. This allows arrays to contain any type of +object, such as variable length unicode strings, or variable length lists, or other +possibilities. When creating an object array, a codec must be provided via the +``object_codec`` argument. This codec handles encoding (serialization) of Python objects. +At the time of writing there are three codecs available that can serve as a +general purpose object codec and support encoding of a variety of +object types: :class:`numcodecs.JSON`, :class:`numcodecs.MsgPack`. and +:class:`numcodecs.Pickle`. + +For example, using the JSON codec:: + + >>> z = zarr.empty(5, dtype=object, object_codec=numcodecs.JSON()) + >>> z[0] = 42 + >>> z[1] = 'foo' + >>> z[2] = ['bar', 'baz', 'qux'] + >>> z[3] = {'a': 1, 'b': 2.2} >>> z[:] - array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', 'Hei maailma!', - 'Xin chào thế giới', 'Njatjeta Botë!', 'Γεια σου κόσμε!', 'こんにちは世界', - '世界,你好!', 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object) + array([42, 'foo', list(['bar', 'baz', 'qux']), {'a': 1, 'b': 2.2}, None], dtype=object) + +Not all codecs support encoding of all object types. The +:class:`numcodecs.Pickle` codec is the most flexible, supporting encoding any type +of Python object. However, if you are sharing data with anyone other than yourself, then +Pickle is not recommended as it is a potential security risk. This is because malicious +code can be embedded within pickled data. The JSON and MsgPack codecs do not have any +security issues and support encoding of unicode strings, lists and dictionaries. +MsgPack is usually faster for both encoding and decoding. + .. _tutorial_chunks: @@ -898,8 +937,8 @@ ratios, depending on the correlation structure within the data. E.g.:: Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) Store type : builtins.dict No. bytes : 400000000 (381.5M) - No. bytes stored : 26805735 (25.6M) - Storage ratio : 14.9 + No. bytes stored : 15857834 (15.1M) + Storage ratio : 25.2 Chunks initialized : 100/100 >>> f = zarr.array(a, chunks=(1000, 1000), order='F') >>> f.info @@ -912,8 +951,8 @@ ratios, depending on the correlation structure within the data. E.g.:: Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) Store type : builtins.dict No. bytes : 400000000 (381.5M) - No. bytes stored : 9633601 (9.2M) - Storage ratio : 41.5 + No. bytes stored : 7233241 (6.9M) + Storage ratio : 55.3 Chunks initialized : 100/100 In the above example, Fortran order gives a better compression ratio. This is an @@ -1014,7 +1053,7 @@ E.g., pickle/unpickle an in-memory array:: >>> import pickle >>> z1 = zarr.array(np.arange(100000)) >>> s = pickle.dumps(z1) - >>> len(s) > 10000 # relatively large because data have been pickled + >>> len(s) > 5000 # relatively large because data have been pickled True >>> z2 = pickle.loads(s) >>> z1 == z2 diff --git a/notebooks/object_arrays.ipynb b/notebooks/object_arrays.ipynb new file mode 100644 index 0000000000..714d024907 --- /dev/null +++ b/notebooks/object_arrays.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Object arrays\n", + "\n", + "See [#212](https://github.com/alimanfoo/zarr/pull/212) for more information." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2.2.0a2.dev82+dirty'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import zarr\n", + "zarr.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.5.0'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numcodecs\n", + "numcodecs.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API changes in Zarr version 2.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creation of an object array requires providing new ``object_codec`` argument:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "z = zarr.empty(10, chunks=5, dtype=object, object_codec=numcodecs.MsgPack())\n", + "z" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To maintain backwards compatibility with previously-created data, the object codec is treated as a filter and inserted as the first filter in the chain:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Typezarr.core.Array
Data typeobject
Shape(10,)
Chunk shape(5,)
OrderC
Read-onlyFalse
Filter [0]MsgPack(encoding='utf-8')
CompressorBlosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store typebuiltins.dict
No. bytes80
No. bytes stored396
Storage ratio0.2
Chunks initialized0/2
" + ], + "text/plain": [ + "Type : zarr.core.Array\n", + "Data type : object\n", + "Shape : (10,)\n", + "Chunk shape : (5,)\n", + "Order : C\n", + "Read-only : False\n", + "Filter [0] : MsgPack(encoding='utf-8')\n", + "Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)\n", + "Store type : builtins.dict\n", + "No. bytes : 80\n", + "No. bytes stored : 396\n", + "Storage ratio : 0.2\n", + "Chunks initialized : 0/2" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "z.info" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['foo', 'bar', 1, list([2, 4, 6, 'baz']), {'a': 'b', 'c': 'd'}, None,\n", + " None, None, None, None], dtype=object)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "z[0] = 'foo'\n", + "z[1] = b'bar' # msgpack doesn't support bytes objects correctly\n", + "z[2] = 1\n", + "z[3] = [2, 4, 6, 'baz']\n", + "z[4] = {'a': 'b', 'c': 'd'}\n", + "a = z[:]\n", + "a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If no ``object_codec`` is provided, a ``ValueError`` is raised:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "missing object_codec for object array", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzarr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/creation.py\u001b[0m in \u001b[0;36mempty\u001b[0;34m(shape, **kwargs)\u001b[0m\n\u001b[1;32m 204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 205\u001b[0m \"\"\"\n\u001b[0;32m--> 206\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 207\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/creation.py\u001b[0m in \u001b[0;36mcreate\u001b[0;34m(shape, chunks, dtype, compressor, fill_value, order, store, synchronizer, overwrite, path, chunk_store, filters, cache_metadata, read_only, object_codec, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,\n\u001b[1;32m 113\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moverwrite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moverwrite\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m chunk_store=chunk_store, filters=filters, object_codec=object_codec)\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;31m# instantiate array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/storage.py\u001b[0m in \u001b[0;36minit_array\u001b[0;34m(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters, object_codec)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moverwrite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moverwrite\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 290\u001b[0m \u001b[0mchunk_store\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunk_store\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfilters\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 291\u001b[0;31m object_codec=object_codec)\n\u001b[0m\u001b[1;32m 292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/storage.py\u001b[0m in \u001b[0;36m_init_array_metadata\u001b[0;34m(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters, object_codec)\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfilters\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 347\u001b[0m \u001b[0;31m# there are no filters so we can be sure there is no object codec\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 348\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'missing object_codec for object array'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 349\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 350\u001b[0m \u001b[0;31m# one of the filters may be an object codec, issue a warning rather\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: missing object_codec for object array" + ] + } + ], + "source": [ + "z = zarr.empty(10, chunks=5, dtype=object)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For API backward-compatibility, if object codec is provided via filters, issue a warning but don't raise an error." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/aliman/src/github/alimanfoo/zarr/zarr/storage.py:353: FutureWarning: missing object_codec for object array; this will raise a ValueError in version 3.0\n", + " 'ValueError in version 3.0', FutureWarning)\n" + ] + } + ], + "source": [ + "z = zarr.empty(10, chunks=5, dtype=object, filters=[numcodecs.MsgPack()])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If a user tries to subvert the system and create an object array with no object codec, a runtime check is added to ensure no object arrays are passed down to the compressor (which could lead to nasty errors and/or segfaults):" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "z = zarr.empty(10, chunks=5, dtype=object, object_codec=numcodecs.MsgPack())\n", + "z._filters = None # try to live dangerously, manually wipe filters" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "cannot write object array without object codec", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mz\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'foo'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, selection, value)\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1095\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpop_fields\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1096\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1097\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1098\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mset_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36mset_basic_selection\u001b[0;34m(self, selection, value, fields)\u001b[0m\n\u001b[1;32m 1189\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_basic_selection_zd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1190\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1191\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_basic_selection_nd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1192\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1193\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mset_orthogonal_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_set_basic_selection_nd\u001b[0;34m(self, selection, value, fields)\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBasicIndexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1481\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1482\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1483\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1484\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_set_selection\u001b[0;34m(self, indexer, value, fields)\u001b[0m\n\u001b[1;32m 1528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1529\u001b[0m \u001b[0;31m# put data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1530\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_chunk_setitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk_coords\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunk_selection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunk_value\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1531\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1532\u001b[0m def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_chunk_setitem\u001b[0;34m(self, chunk_coords, chunk_selection, value, fields)\u001b[0m\n\u001b[1;32m 1633\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1634\u001b[0m self._chunk_setitem_nosync(chunk_coords, chunk_selection, value,\n\u001b[0;32m-> 1635\u001b[0;31m fields=fields)\n\u001b[0m\u001b[1;32m 1636\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1637\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_chunk_setitem_nosync\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunk_coords\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunk_selection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_chunk_setitem_nosync\u001b[0;34m(self, chunk_coords, chunk_selection, value, fields)\u001b[0m\n\u001b[1;32m 1707\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1708\u001b[0m \u001b[0;31m# encode chunk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1709\u001b[0;31m \u001b[0mcdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_encode_chunk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1710\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1711\u001b[0m \u001b[0;31m# store\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_encode_chunk\u001b[0;34m(self, chunk)\u001b[0m\n\u001b[1;32m 1753\u001b[0m \u001b[0;31m# check object encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1754\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mchunk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1755\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'cannot write object array without object codec'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1756\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1757\u001b[0m \u001b[0;31m# compress\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: cannot write object array without object codec" + ] + } + ], + "source": [ + "z[0] = 'foo'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here is another way to subvert the system, wiping filters **after** storing some data. To cover this case a runtime check is added to ensure no object arrays are handled inappropriately during decoding (which could lead to nasty errors and/or segfaults)." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['¡Hola mundo!', 'Hej Världen!', 'Servus Woid!', 'Hei maailma!',\n", + " 'Xin chào thế giới', 'Njatjeta Botë!', 'Γεια σου κόσμε!', 'こんにちは世界',\n", + " '世界,你好!', 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from numcodecs.tests.common import greetings\n", + "z = zarr.array(greetings, chunks=5, dtype=object, object_codec=numcodecs.MsgPack())\n", + "z[:]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "cannot read object array without object codec", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filters\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;31m# try to live dangerously, manually wipe filters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mz\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, selection)\u001b[0m\n\u001b[1;32m 551\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpop_fields\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 553\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 554\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 555\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mEllipsis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36mget_basic_selection\u001b[0;34m(self, selection, out, fields)\u001b[0m\n\u001b[1;32m 677\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 678\u001b[0m return self._get_basic_selection_nd(selection=selection, out=out,\n\u001b[0;32m--> 679\u001b[0;31m fields=fields)\n\u001b[0m\u001b[1;32m 680\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 681\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_get_basic_selection_zd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_get_basic_selection_nd\u001b[0;34m(self, selection, out, fields)\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBasicIndexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 721\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 722\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_orthogonal_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_get_selection\u001b[0;34m(self, indexer, out, fields)\u001b[0m\n\u001b[1;32m 1007\u001b[0m \u001b[0;31m# load chunk selection into output array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1008\u001b[0m self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,\n\u001b[0;32m-> 1009\u001b[0;31m drop_axes=indexer.drop_axes, fields=fields)\n\u001b[0m\u001b[1;32m 1010\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1011\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_chunk_getitem\u001b[0;34m(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)\u001b[0m\n\u001b[1;32m 1597\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1598\u001b[0m \u001b[0;31m# decode chunk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1599\u001b[0;31m \u001b[0mchunk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_decode_chunk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1600\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1601\u001b[0m \u001b[0;31m# select data from chunk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_decode_chunk\u001b[0;34m(self, cdata)\u001b[0m\n\u001b[1;32m 1733\u001b[0m \u001b[0mchunk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mchunk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1735\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'cannot read object array without object codec'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1736\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[0mchunk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mchunk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: cannot read object array without object codec" + ] + } + ], + "source": [ + "z._filters = [] # try to live dangerously, manually wipe filters\n", + "z[:]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000..d588d41a8a --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL + diff --git a/requirements.txt b/requirements.txt index 10478995ee..e4061c6981 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ asciitree nose +pytest numpy fasteners numcodecs diff --git a/requirements_dev.txt b/requirements_dev.txt index 6ac0d20562..4523bf97f2 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -15,7 +15,7 @@ mccabe==0.6.1 monotonic==1.3 msgpack-python==0.4.8 nose==1.3.7 -numcodecs==0.2.1 +numcodecs==0.4.1 numpy==1.13.3 packaging==16.8 pkginfo==1.4.1 @@ -24,6 +24,8 @@ py==1.4.34 pycodestyle==2.3.1 pyflakes==1.6.0 pyparsing==2.2.0 +pytest==3.2.3 +pytest-cov==2.5.1 requests==2.18.4 requests-toolbelt==0.8.0 setuptools-scm==1.15.6 diff --git a/setup.py b/setup.py index c420174a75..4f615d5144 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ 'asciitree', 'numpy>=1.7', 'fasteners', - 'numcodecs>=0.2.0', + 'numcodecs>=0.4.1', ], package_dir={'': '.'}, packages=['zarr', 'zarr.tests'], diff --git a/tox.ini b/tox.ini index be41364ec4..9d0db40a81 100644 --- a/tox.ini +++ b/tox.ini @@ -14,8 +14,8 @@ setenv = py27: PY_MAJOR_VERSION = py2 commands = python -c 'import glob; import shutil; import os; [(shutil.rmtree(d) if os.path.isdir(d) else os.remove(d) if os.path.isfile(d) else None) for d in glob.glob("./example*")]' - py27,py34,py35: nosetests -v --with-coverage --cover-erase --cover-package=zarr zarr - py36: nosetests -v --with-coverage --cover-erase --cover-package=zarr --with-doctest --doctest-options=+NORMALIZE_WHITESPACE,+ELLIPSIS zarr + py27,py34,py35: pytest -v --cov=zarr zarr + py36: pytest -v --cov=zarr --doctest-modules zarr coverage report -m py36: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst py36: flake8 --max-line-length=100 zarr diff --git a/zarr/core.py b/zarr/core.py index 4d33049343..ca8683ee36 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1498,7 +1498,11 @@ def _set_selection(self, indexer, value, fields=None): sel_shape = indexer.shape # check value shape - if is_scalar(value, self._dtype): + if sel_shape == (): + # setting a single item + pass + elif is_scalar(value, self._dtype): + # setting a scalar value pass else: if not hasattr(value, 'shape'): @@ -1509,7 +1513,9 @@ def _set_selection(self, indexer, value, fields=None): for chunk_coords, chunk_selection, out_selection in indexer: # extract data to store - if is_scalar(value, self._dtype): + if sel_shape == (): + chunk_value = value + elif is_scalar(value, self._dtype): chunk_value = value else: chunk_value = value[out_selection] @@ -1564,7 +1570,8 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, not fields and is_contiguous_selection(out_selection) and is_total_slice(chunk_selection, self._chunks) and - not self._filters): + not self._filters and + self._dtype != object): dest = out[out_selection] write_direct = ( @@ -1676,6 +1683,8 @@ def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=Non if self._fill_value is not None: chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) chunk.fill(self._fill_value) + elif self._dtype == object: + chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) else: # N.B., use zeros here so any region beyond the array has consistent # and compressible data @@ -1719,7 +1728,12 @@ def _decode_chunk(self, cdata): chunk = f.decode(chunk) # view as correct dtype - if isinstance(chunk, np.ndarray): + if self._dtype == object: + if isinstance(chunk, np.ndarray): + chunk = chunk.astype(self._dtype) + else: + raise RuntimeError('cannot read object array without object codec') + elif isinstance(chunk, np.ndarray): chunk = chunk.view(self._dtype) else: chunk = np.frombuffer(chunk, self._dtype) @@ -1736,6 +1750,10 @@ def _encode_chunk(self, chunk): for f in self._filters: chunk = f.encode(chunk) + # check object encoding + if isinstance(chunk, np.ndarray) and chunk.dtype == object: + raise RuntimeError('cannot write object array without object codec') + # compress if self._compressor: cdata = self._compressor.encode(chunk) diff --git a/zarr/creation.py b/zarr/creation.py index f053654830..d7241ad3c5 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -16,7 +16,8 @@ def create(shape, chunks=True, dtype=None, compressor='default', fill_value=0, order='C', store=None, synchronizer=None, overwrite=False, path=None, chunk_store=None, filters=None, - cache_metadata=True, read_only=False, **kwargs): + cache_metadata=True, read_only=False, object_codec=None, + **kwargs): """Create an array. Parameters @@ -55,6 +56,8 @@ def create(shape, chunks=True, dtype=None, compressor='default', overhead depending on storage and data access pattern). read_only : bool, optional True if array should be protected against modification. + object_codec : Codec, optional + A codec to encode object arrays, only needed if dtype=object. Returns ------- @@ -82,8 +85,8 @@ def create(shape, chunks=True, dtype=None, compressor='default', e.g., `MsgPack` or `Pickle` from `numcodecs`:: >>> from numcodecs import MsgPack - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='object', - ... filters=[MsgPack()]) + >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype=object, + ... object_codec=MsgPack()) >>> z @@ -108,7 +111,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', # initialize array metadata init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, overwrite=overwrite, path=path, - chunk_store=chunk_store, filters=filters) + chunk_store=chunk_store, filters=filters, object_codec=object_codec) # instantiate array z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer, @@ -340,7 +343,7 @@ def array(data, **kwargs): def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor='default', fill_value=0, order='C', synchronizer=None, filters=None, cache_metadata=True, - path=None, **kwargs): + path=None, object_codec=None, **kwargs): """Open an array using file-mode-like semantics. Parameters @@ -376,6 +379,8 @@ def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor= overhead depending on storage and data access pattern). path : string, optional Array path within store. + object_codec : Codec, optional + A codec to encode object arrays, only needed if dtype=object. Returns ------- @@ -432,7 +437,8 @@ def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor= elif mode == 'w': init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, - order=order, filters=filters, overwrite=True, path=path) + order=order, filters=filters, overwrite=True, path=path, + object_codec=object_codec) elif mode == 'a': if contains_group(store, path=path): @@ -440,7 +446,8 @@ def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor= elif not contains_array(store, path=path): init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, - order=order, filters=filters, path=path) + order=order, filters=filters, path=path, + object_codec=object_codec) elif mode in ['w-', 'x']: if contains_group(store, path=path): @@ -450,7 +457,8 @@ def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor= else: init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, - order=order, filters=filters, path=path) + order=order, filters=filters, path=path, + object_codec=object_codec) # determine read only status read_only = mode == 'r' diff --git a/zarr/meta.py b/zarr/meta.py index 51661eeebb..0ad91cfca3 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -64,7 +64,8 @@ def encode_array_metadata(meta): order=meta['order'], filters=meta['filters'], ) - s = json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True, separators=(',', ': ')) + s = json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True, + separators=(',', ': ')) b = s.encode('ascii') return b diff --git a/zarr/storage.py b/zarr/storage.py index de3e2224e5..bf821f82ab 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -18,6 +18,7 @@ import multiprocessing from threading import Lock, RLock import glob +import warnings import numpy as np @@ -173,7 +174,7 @@ def _require_parent_group(path, store, chunk_store, overwrite): def init_array(store, shape, chunks=True, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, - chunk_store=None, filters=None): + chunk_store=None, filters=None, object_codec=None): """Initialize an array store with the given configuration. Note that this is a low-level function and there should be no need to call this directly from user code. @@ -203,6 +204,8 @@ def init_array(store, shape, chunks=True, dtype=None, compressor='default', for storage of both chunks and metadata. filters : sequence, optional Sequence of filters to use to encode chunk data prior to compression. + object_codec : Codec, optional + A codec to encode object arrays, only needed if dtype=object. Examples -------- @@ -284,12 +287,13 @@ def init_array(store, shape, chunks=True, dtype=None, compressor='default', _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, overwrite=overwrite, path=path, - chunk_store=chunk_store, filters=filters) + chunk_store=chunk_store, filters=filters, + object_codec=object_codec) def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, - chunk_store=None, filters=None): + chunk_store=None, filters=None, object_codec=None): # guard conditions if overwrite: @@ -334,6 +338,26 @@ def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='defa if filters: filters_config = [f.get_config() for f in filters] else: + filters_config = [] + + # deal with object encoding + if dtype == object: + if object_codec is None: + if not filters: + # there are no filters so we can be sure there is no object codec + raise ValueError('missing object_codec for object array') + else: + # one of the filters may be an object codec, issue a warning rather + # than raise an error to maintain backwards-compatibility + warnings.warn('missing object_codec for object array; this will raise a ' + 'ValueError in version 3.0', FutureWarning) + else: + filters_config.insert(0, object_codec.get_config()) + elif object_codec is not None: + warnings.warn('an object_codec is only needed for object arrays') + + # use null to indicate no filters + if not filters_config: filters_config = None # initialize metadata diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 846745c6b6..140af457d9 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -6,6 +6,7 @@ import shutil import pickle import os +import warnings import numpy as np @@ -13,6 +14,7 @@ from nose.tools import (eq_ as eq, assert_is_instance, assert_raises, assert_true, assert_false, assert_is, assert_is_none) from nose import SkipTest +import pytest from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore, @@ -21,7 +23,14 @@ from zarr.errors import PermissionError from zarr.compat import PY2 from zarr.util import buffer_size -from numcodecs import Delta, FixedScaleOffset, Zlib, Blosc, BZ2 +from numcodecs import (Delta, FixedScaleOffset, Zlib, Blosc, BZ2, MsgPack, Pickle, + Categorize, JSON) + + +# needed for PY2/PY3 consistent behaviour +if PY2: # pragma: py3 no cover + warnings.resetwarnings() + warnings.simplefilter('always') # noinspection PyMethodMayBeStatic @@ -864,6 +873,127 @@ def test_dtypes(self): with assert_raises(ValueError): self.create_array(shape=10, dtype='timedelta64[{}]'.format(resolution)) + def test_object_arrays(self): + + # an object_codec is required for object arrays + with pytest.raises(ValueError): + self.create_array(shape=10, chunks=3, dtype=object) + + # an object_codec is required for object arrays, but allow to be provided via + # filters to maintain API backwards compatibility + with pytest.warns(FutureWarning): + self.create_array(shape=10, chunks=3, dtype=object, filters=[MsgPack()]) + + # create an object array using msgpack + z = self.create_array(shape=10, chunks=3, dtype=object, object_codec=MsgPack()) + z[0] = 'foo' + assert z[0] == 'foo' + z[1] = b'bar' + assert z[1] == 'bar' # msgpack gets this wrong + z[2] = 1 + assert z[2] == 1 + z[3] = [2, 4, 6, 'baz'] + assert z[3] == [2, 4, 6, 'baz'] + z[4] = {'a': 'b', 'c': 'd'} + assert z[4] == {'a': 'b', 'c': 'd'} + a = z[:] + assert a.dtype == object + + # create an object array using pickle + z = self.create_array(shape=10, chunks=3, dtype=object, object_codec=Pickle()) + z[0] = 'foo' + assert z[0] == 'foo' + z[1] = b'bar' + assert z[1] == b'bar' + z[2] = 1 + assert z[2] == 1 + z[3] = [2, 4, 6, 'baz'] + assert z[3] == [2, 4, 6, 'baz'] + z[4] = {'a': 'b', 'c': 'd'} + assert z[4] == {'a': 'b', 'c': 'd'} + a = z[:] + assert a.dtype == object + + # create an object array using JSON + z = self.create_array(shape=10, chunks=3, dtype=object, object_codec=JSON()) + z[0] = 'foo' + assert z[0] == 'foo' + # z[1] = b'bar' + # assert z[1] == b'bar' # not supported for JSON + z[2] = 1 + assert z[2] == 1 + z[3] = [2, 4, 6, 'baz'] + assert z[3] == [2, 4, 6, 'baz'] + z[4] = {'a': 'b', 'c': 'd'} + assert z[4] == {'a': 'b', 'c': 'd'} + a = z[:] + assert a.dtype == object + + def test_object_arrays_text(self): + + from numcodecs.tests.common import greetings + data = np.array(greetings * 1000, dtype=object) + + z = self.create_array(shape=data.shape, dtype=object, object_codec=MsgPack()) + z[:] = data + assert_array_equal(data, z[:]) + + z = self.create_array(shape=data.shape, dtype=object, object_codec=JSON()) + z[:] = data + assert_array_equal(data, z[:]) + + z = self.create_array(shape=data.shape, dtype=object, object_codec=Pickle()) + z[:] = data + assert_array_equal(data, z[:]) + + z = self.create_array(shape=data.shape, dtype=object, + object_codec=Categorize(greetings, dtype=object)) + z[:] = data + assert_array_equal(data, z[:]) + + def test_object_arrays_danger(self): + + # do something dangerous - manually force an object array with no object codec + z = self.create_array(shape=5, chunks=2, dtype=object, fill_value=0, + object_codec=MsgPack()) + z._filters = None # wipe filters + with assert_raises(RuntimeError): + z[0] = 'foo' + with assert_raises(RuntimeError): + z[:] = 42 + + # do something else dangerous + labels = [ + '¡Hola mundo!', + 'Hej Världen!', + 'Servus Woid!', + 'Hei maailma!', + 'Xin chào thế giới', + 'Njatjeta Botë!', + 'Γεια σου κόσμε!', + 'こんにちは世界', + '世界,你好!', + 'Helló, világ!', + 'Zdravo svete!', + 'เฮลโลเวิลด์' + ] + data = labels * 10 + for compressor in Zlib(1), Blosc(): + z = self.create_array(shape=len(data), chunks=30, dtype=object, + object_codec=Categorize(labels, dtype=object), + compressor=compressor) + z[:] = data + v = z.view(filters=[]) + with assert_raises(RuntimeError): + # noinspection PyStatementEffect + v[:] + + def test_object_codec_warnings(self): + + with pytest.warns(UserWarning): + # provide object_codec, but not object dtype + self.create_array(shape=10, chunks=5, dtype='i4', object_codec=JSON()) + class TestArrayWithPath(TestArray): @@ -1282,11 +1412,23 @@ def test_astype(self): assert_array_equal(expected, z2) def test_structured_array(self): - # don't implement this one, cannot do delta on structured array + # skip this one, cannot do delta on structured array pass def test_dtypes(self): - # don't implement this one, delta messes up floats + # skip this one, delta messes up floats + pass + + def test_object_arrays(self): + # skip this one, cannot use delta with objects + pass + + def test_object_arrays_text(self): + # skip this one, cannot use delta with objects + pass + + def test_object_arrays_danger(self): + # skip this one, cannot use delta with objects pass @@ -1380,3 +1522,7 @@ def test_cache_metadata(self): eq(300, a2.size) eq(300, a2.nbytes) eq(30, a2.nchunks) + + def test_object_arrays_danger(self): + # skip this one as it only works if metadata are cached + pass diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 5ef48247cf..ce71de44b0 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -9,10 +9,11 @@ import numpy as np from nose.tools import eq_ as eq, assert_is_none, assert_is_instance, assert_raises from numpy.testing import assert_array_equal +import pytest -from zarr.creation import (array, empty, zeros, ones, full, open_array, empty_like, zeros_like, - ones_like, full_like, open_like, create) +from zarr.creation import (array, empty, zeros, ones, full, open_array, empty_like, + zeros_like, ones_like, full_like, open_like, create) from zarr.sync import ThreadSynchronizer from zarr.core import Array from zarr.storage import DirectoryStore @@ -23,8 +24,9 @@ # needed for PY2/PY3 consistent behaviour -warnings.resetwarnings() -warnings.simplefilter('always') +if PY2: # pragma: py3 no cover + warnings.resetwarnings() + warnings.simplefilter('always') # something bcolz-like @@ -438,8 +440,6 @@ def test_create(): def test_compression_args(): - warnings.resetwarnings() - warnings.simplefilter('always') z = create(100, compression='zlib', compression_opts=9) assert_is_instance(z, Array) @@ -458,16 +458,12 @@ def test_compression_args(): eq('zlib', z.compressor.codec_id) eq(9, z.compressor.level) - warnings.resetwarnings() - warnings.simplefilter('error') - with assert_raises(UserWarning): + with pytest.warns(UserWarning): # 'compressor' overrides 'compression' create(100, compressor=Zlib(9), compression='bz2', compression_opts=1) - with assert_raises(UserWarning): + with pytest.warns(UserWarning): # 'compressor' ignores 'compression_opts' create(100, compressor=Zlib(9), compression_opts=1) - warnings.resetwarnings() - warnings.simplefilter('always') def test_create_read_only(): @@ -485,8 +481,8 @@ def test_create_read_only(): with assert_raises(PermissionError): z[:] = 0 - # this is subtly different, but here we want to create an array with data, and then have it - # be read-only + # this is subtly different, but here we want to create an array with data, and then + # have it be read-only a = np.arange(100) z = array(a, read_only=True) assert_array_equal(a, z[...]) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index e53e3fa0ef..2273568a0e 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -15,6 +15,7 @@ from nose.tools import (assert_raises, eq_ as eq, assert_is, assert_true, assert_is_instance, assert_false, assert_is_none) from nose import SkipTest +import pytest from zarr.storage import (DictStore, DirectoryStore, ZipStore, init_group, init_array, @@ -31,8 +32,9 @@ # needed for PY2/PY3 consistent behaviour -warnings.resetwarnings() -warnings.simplefilter('always') +if PY2: # pragma: py3 no cover + warnings.resetwarnings() + warnings.simplefilter('always') # noinspection PyStatementEffect @@ -363,16 +365,8 @@ def test_create_errors(self): eq(42, d.fill_value) # h5py compatibility, ignore 'shuffle' - warnings.resetwarnings() - warnings.simplefilter('always') - d = g.create_dataset('y1', shape=100, chunks=10, shuffle=True) - assert not hasattr(d, 'shuffle') - warnings.resetwarnings() - warnings.simplefilter('error') - with assert_raises(UserWarning): - g.create_dataset('y2', shape=100, chunks=10, shuffle=True) - warnings.resetwarnings() - warnings.simplefilter('always') + with pytest.warns(UserWarning, match="ignoring keyword argument 'shuffle'"): + g.create_dataset('y', shape=100, chunks=10, shuffle=True) # read-only g = self.create_group(read_only=True) @@ -406,6 +400,7 @@ def test_create_overwrite(self): # overwrite array with group d = getattr(g, method_name)('foo/bar', shape=400, chunks=40, overwrite=True) + eq((400,), d.shape) assert_is_instance(g['foo'], Group) except NotImplementedError: pass @@ -1069,6 +1064,7 @@ def test_group_completions(): def test_group_key_completions(): g = group() d = dir(g) + # noinspection PyProtectedMember k = g._ipython_key_completions_() # none of these names should be an attribute @@ -1103,6 +1099,7 @@ def test_group_key_completions(): g.zeros('asdf;', shape=100) d = dir(g) + # noinspection PyProtectedMember k = g._ipython_key_completions_() assert 'foo' in d diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py index b52aab0cc6..d9a664c72c 100644 --- a/zarr/tests/test_sync.py +++ b/zarr/tests/test_sync.py @@ -177,6 +177,10 @@ def test_hexdigest(self): z.attrs['foo'] = 'bar' eq('05b0663ffe1785f38d3a459dec17e57a18f254af', z.hexdigest()) + def test_object_arrays_danger(self): + # skip this one, metadata get reloaded in each process + pass + def _create_group(arg): g, name = arg