diff --git a/.travis.yml b/.travis.yml index 1bc6e0decf..886ea772f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,9 @@ language: python +branches: + only: + - master + sudo: false addons: diff --git a/docs/release.rst b/docs/release.rst index 588a9e0c24..2835cbb781 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -1,6 +1,16 @@ Release notes ============= +Changes to ``__repr__``; new ``info`` property +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The string representation (``__repr__``) of array and group objects has been been simplified +(`#83 `_, +`#115 `_, +`#132 `_). +Further diagnostic information can be obtained via a new ``info`` property. See the tutorial +section on :ref:`tutorial_tips_info` for examples. + .. _release_2.1.4: 2.1.4 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index dedb7d9951..12808c6966 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -20,10 +20,7 @@ example:: >>> import zarr >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + The code above creates a 2-dimensional array of 32-bit integers with 10000 rows and 10000 columns, divided into chunks where each chunk has @@ -42,11 +39,6 @@ and writing data. For example, the entire array can be filled with a scalar value:: >>> z[:] = 42 - >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict Notice that the values of ``initialized`` has changed. This is because when a Zarr array is first created, none of the chunks are initialized. @@ -90,10 +82,7 @@ enabling persistence of data between sessions. For example:: >>> z1 = zarr.open_array('example.zarr', mode='w', shape=(10000, 10000), ... chunks=(1000, 1000), dtype='i4', fill_value=0) >>> z1 - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: DirectoryStore + The array above will store its configuration metadata and all compressed chunk data in a directory called 'example.zarr' relative to @@ -114,10 +103,7 @@ Check that the data have been written and can be read again:: >>> z2 = zarr.open_array('example.zarr', mode='r') >>> z2 - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: DirectoryStore + >>> np.all(z1[:] == z2[:]) True @@ -151,9 +137,8 @@ which can be used to append data to any axis. E.g.:: (20000, 1000) >>> z.append(np.vstack([a, a]), axis=1) (20000, 2000) - >>> z - Array((20000, 2000), int32, chunks=(1000, 100), order=C) - ... + >>> z.shape + (20000, 2000) .. _tutorial_compress: @@ -177,11 +162,8 @@ accepted by all array creation functions. For example:: >>> z = zarr.array(np.arange(100000000, dtype='i4').reshape(10000, 10000), ... chunks=(1000, 1000), ... compressor=Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE)) - >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 100/100 - compressor: Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, blocksize=0) - store: dict + >>> z.compressor + Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, blocksize=0) The array above will use Blosc as the primary compressor, using the Zstandard algorithm (compression level 3) internally within Blosc, and with @@ -201,11 +183,8 @@ used. For example, here is an array using Zstandard compression, level 1:: >>> z = zarr.array(np.arange(100000000, dtype='i4').reshape(10000, 10000), ... chunks=(1000, 1000), ... compressor=Zstd(level=1)) - >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 100/100 - compressor: Zstd(level=1) - store: dict + >>> z.compressor + Zstd(level=1) Here is an example using LZMA with a custom filter pipeline including LZMA's built-in delta filter:: @@ -217,11 +196,8 @@ LZMA's built-in delta filter:: >>> compressor = LZMA(filters=lzma_filters) >>> z = zarr.array(np.arange(100000000, dtype='i4').reshape(10000, 10000), ... chunks=(1000, 1000), compressor=compressor) - >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 100/100 - compressor: LZMA(format=1, check=-1, preset=None, filters=[{'dist': 4, 'id': 3}, {'id': 33, 'preset': 1}]) - store: dict + >>> z.compressor + LZMA(format=1, check=-1, preset=None, filters=[{'dist': 4, 'id': 3}, {'id': 33, 'preset': 1}]) The default compressor can be changed by setting the value of the ``zarr.storage.default_compressor`` variable, e.g.:: @@ -231,21 +207,16 @@ The default compressor can be changed by setting the value of the >>> # switch to using Zstandard ... zarr.storage.default_compressor = Zstd(level=1) >>> z = zarr.zeros(100000000, chunks=1000000) - >>> z - Array((100000000,), float64, chunks=(1000000,), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Zstd(level=1) - store: dict + >>> z.compressor + Zstd(level=1) >>> # switch back to Blosc defaults ... zarr.storage.default_compressor = Blosc() To disable compression, set ``compressor=None`` when creating an array, e.g.:: >>> z = zarr.zeros(100000000, chunks=1000000, compressor=None) - >>> z - Array((100000000,), float64, chunks=(1000000,), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - store: dict + >>> z.compressor is None + True .. _tutorial_filters: @@ -272,11 +243,21 @@ Here is an example using the delta filter with the Blosc compressor: >>> z = zarr.array(np.arange(100000000, dtype='i4').reshape(10000, 10000), ... chunks=(1000, 1000), filters=filters, compressor=compressor) >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 100/100 - filters: Delta(dtype=' + >>> z.info + Type : zarr.core.Array + Data type : int32 + Shape : (10000, 10000) + Chunk shape : (1000, 1000) + Order : C + Read-only : False + Filter [0] : Delta(dtype='`_ documentation. @@ -322,10 +303,7 @@ array with thread synchronization:: >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4', ... synchronizer=zarr.ThreadSynchronizer()) >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict; synchronizer: ThreadSynchronizer + This array is safe to read or write within a multi-threaded program. @@ -337,10 +315,7 @@ provided that all processes have access to a shared file system. E.g.:: ... chunks=(1000, 1000), dtype='i4', ... synchronizer=synchronizer) >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: DirectoryStore; synchronizer: ProcessSynchronizer + This array is safe to read or write from multiple processes. @@ -380,8 +355,7 @@ To create a group, use the :func:`zarr.hierarchy.group` function:: >>> root_group = zarr.group() >>> root_group - Group(/, 0) - store: DictStore + Groups have a similar API to the Group class from `h5py `_. For example, groups can contain other groups:: @@ -394,8 +368,7 @@ Groups can also contain arrays, e.g.:: >>> z1 = bar_group.zeros('baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4', ... compressor=zarr.Blosc(cname='zstd', clevel=1, shuffle=1)) >>> z1 - Array(/foo/bar/baz, (10000, 10000), int32, chunks=(1000, 1000), order=C) - ... + Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the :func:`zarr.hierarchy.Group.create_dataset` @@ -406,26 +379,20 @@ and :func:`zarr.hierarchy.Group.require_dataset` methods, e.g.:: ... fill_value=0, compression='gzip', ... compression_opts=1) >>> z - Array(/foo/bar/quux, (10000, 10000), int32, chunks=(1000, 1000), order=C) - ... + Members of a group can be accessed via the suffix notation, e.g.:: >>> root_group['foo'] - Group(/foo, 1) - groups: 1; bar - store: DictStore + The '/' character can be used to access multiple levels of the hierarchy, e.g.:: >>> root_group['foo/bar'] - Group(/foo/bar, 2) - arrays: 2; baz, quux - store: DictStore + >>> root_group['foo/bar/baz'] - Array(/foo/bar/baz, (10000, 10000), int32, chunks=(1000, 1000), order=C) - ... + The :func:`zarr.hierarchy.open_group` provides a convenient way to create or re-open a group stored in a directory on the file-system, with sub-groups @@ -433,14 +400,12 @@ stored in sub-directories, e.g.:: >>> persistent_group = zarr.open_group('example', mode='w') >>> persistent_group - Group(/, 0) - store: DirectoryStore + >>> z = persistent_group.create_dataset('foo/bar/baz', shape=(10000, 10000), ... chunks=(1000, 1000), dtype='i4', ... fill_value=0) >>> z - Array(/foo/bar/baz, (10000, 10000), int32, chunks=(1000, 1000), order=C) - ... + For more information on groups see the :mod:`zarr.hierarchy` API docs. @@ -449,6 +414,58 @@ For more information on groups see the :mod:`zarr.hierarchy` API docs. Tips and tricks --------------- +.. _tutorial_tips_info: + +Array and group information +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Diagnostic information about arrays and groups is available via the ``info`` property. E.g.:: + + >>> root_group = zarr.group() + >>> foo_group = root_group.create_group('foo') + >>> z = foo_group.zeros('bar', shape=1000000, chunks=100000) + >>> z[:] = 42 + >>> root_group + + >>> root_group.info + Name : / + Type : zarr.hierarchy.Group + Read-only : False + Store type : zarr.storage.DictStore + No. members : 1 + No. arrays : 0 + No. groups : 1 + Groups : foo + + >>> foo_group + + >>> foo_group.info + Name : /foo + Type : zarr.hierarchy.Group + Read-only : False + Store type : zarr.storage.DictStore + No. members : 1 + No. arrays : 1 + No. groups : 0 + Arrays : bar + + >>> z + + >>> z.info + Name : /foo/bar + Type : zarr.core.Array + Data type : float64 + Shape : (1000000,) + Chunk shape : (100000,) + Order : C + Read-only : False + Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) + Store type : zarr.storage.DictStore + No. bytes : 8000000 (7.6M) + No. bytes stored : 38482 (37.6K) + Storage ratio : 207.9 + Chunks initialized : 10/10 + .. _tutorial_tips_copy: Copying large arrays @@ -478,16 +495,34 @@ compression ratios, depending on the correlation structure within the data. E.g.:: >>> a = np.arange(100000000, dtype='i4').reshape(10000, 10000).T - >>> zarr.array(a, chunks=(1000, 1000)) - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: 25.6M; ratio: 14.9; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict - >>> zarr.array(a, chunks=(1000, 1000), order='F') - Array((10000, 10000), int32, chunks=(1000, 1000), order=F) - nbytes: 381.5M; nbytes_stored: 9.2M; ratio: 41.5; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + >>> c = zarr.array(a, chunks=(1000, 1000)) + >>> c.info + Type : zarr.core.Array + Data type : int32 + Shape : (10000, 10000) + Chunk shape : (1000, 1000) + Order : C + Read-only : False + Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) + Store type : builtins.dict + No. bytes : 400000000 (381.5M) + No. bytes stored : 26805737 (25.6M) + Storage ratio : 14.9 + Chunks initialized : 100/100 + >>> f = zarr.array(a, chunks=(1000, 1000), order='F') + >>> f.info + Type : zarr.core.Array + Data type : int32 + Shape : (10000, 10000) + Chunk shape : (1000, 1000) + Order : F + Read-only : False + Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) + Store type : builtins.dict + No. bytes : 400000000 (381.5M) + No. bytes stored : 9633603 (9.2M) + Storage ratio : 41.5 + Chunks initialized : 100/100 In the above example, Fortran order gives a better compression ratio. This is an artifical example but illustrates the general point that changing the @@ -506,32 +541,19 @@ the store for a group or an array. Here is an example storing an array directly into a Zip file:: >>> store = zarr.ZipStore('example.zip', mode='w') - >>> z = zarr.zeros((1000, 1000), chunks=(100, 100), dtype='i4', store=store) - >>> z - Array((1000, 1000), int32, chunks=(100, 100), order=C) - nbytes: 3.8M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: ZipStore + >>> root_group = zarr.group(store=store) + >>> z = root_group.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') >>> z[:] = 42 - >>> z - Array((1000, 1000), int32, chunks=(100, 100), order=C) - nbytes: 3.8M; nbytes_stored: ...; ratio: ...; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: ZipStore >>> store.close() >>> import os >>> os.path.getsize('example.zip') - 30745 + 32805 Re-open and check that data have been written:: >>> store = zarr.ZipStore('example.zip', mode='r') - >>> z = zarr.Array(store) - >>> z - Array((1000, 1000), int32, chunks=(100, 100), order=C) - nbytes: 3.8M; nbytes_stored: ...; ratio: ...; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: ZipStore + >>> root_group = zarr.group(store=store) + >>> z = root_group['foo/bar'] >>> z[:] array([[42, 42, 42, ..., 42, 42, 42], [42, 42, 42, ..., 42, 42, 42], diff --git a/notebooks/repr_info.ipynb b/notebooks/repr_info.ipynb new file mode 100644 index 0000000000..487a4175ba --- /dev/null +++ b/notebooks/repr_info.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root = zarr.group()\n", + "root" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Name/
Typezarr.hierarchy.Group
Read-onlyFalse
Store typezarr.storage.DictStore
No. members0
No. arrays0
No. groups0
" + ], + "text/plain": [ + "Name : /\n", + "Type : zarr.hierarchy.Group\n", + "Read-only : False\n", + "Store type : zarr.storage.DictStore\n", + "No. members : 0\n", + "No. arrays : 0\n", + "No. groups : 0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root.info" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "z = root.zeros('foo/bar/baz', shape=1000000)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "z" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Name/foo/bar/baz
Typezarr.core.Array
Data typefloat64
Shape(1000000,)
Chunk shape(15625,)
OrderC
Read-onlyFalse
CompressorBlosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store typezarr.storage.DictStore
No. bytes8000000 (7.6M)
No. bytes stored321
Storage ratio24922.1
Chunks initialized0/64
" + ], + "text/plain": [ + "Name : /foo/bar/baz\n", + "Type : zarr.core.Array\n", + "Data type : float64\n", + "Shape : (1000000,)\n", + "Chunk shape : (15625,)\n", + "Order : C\n", + "Read-only : False\n", + "Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)\n", + "Store type : zarr.storage.DictStore\n", + "No. bytes : 8000000 (7.6M)\n", + "No. bytes stored : 321\n", + "Storage ratio : 24922.1\n", + "Chunks initialized : 0/64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "z.info" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "z[:] = 42" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Name/foo/bar/baz
Typezarr.core.Array
Data typefloat64
Shape(1000000,)
Chunk shape(15625,)
OrderC
Read-onlyFalse
CompressorBlosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store typezarr.storage.DictStore
No. bytes8000000 (7.6M)
No. bytes stored39553 (38.6K)
Storage ratio202.3
Chunks initialized64/64
" + ], + "text/plain": [ + "Name : /foo/bar/baz\n", + "Type : zarr.core.Array\n", + "Data type : float64\n", + "Shape : (1000000,)\n", + "Chunk shape : (15625,)\n", + "Order : C\n", + "Read-only : False\n", + "Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)\n", + "Store type : zarr.storage.DictStore\n", + "No. bytes : 8000000 (7.6M)\n", + "No. bytes stored : 39553 (38.6K)\n", + "Storage ratio : 202.3\n", + "Chunks initialized : 64/64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "z.info" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(1000):\n", + " root.create_group(i)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Name/
Typezarr.hierarchy.Group
Read-onlyFalse
Store typezarr.storage.DictStore
No. members1001
No. arrays0
No. groups1001
Groups0, 1, 10, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 11, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 12, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 13, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 14, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 15, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 16, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 17, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 19, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 2, 20, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 21, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 22, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 23, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 24, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 25, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 26, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 27, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 28, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 29, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 3, 30, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 31, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 32, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 33, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 34, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 35, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 36, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 37, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 38, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 39, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 4, 40, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 41, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 42, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 43, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 44, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 45, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 46, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 47, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 48, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 49, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 5, 50, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 51, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 52, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 53, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 54, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 55, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 56, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 57, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 58, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 59, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 6, 60, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 61, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 62, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 63, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 64, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 65, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 66, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 67, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 68, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 69, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 7, 70, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 71, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 72, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 73, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 74, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 75, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 76, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 77, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 78, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 79, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 8, 80, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 81, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 82, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 83, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 84, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 85, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 86, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 87, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 88, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 89, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 9, 90, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 91, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 92, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 93, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 94, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 95, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 96, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 97, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 98, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 99, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, foo
" + ], + "text/plain": [ + "Name : /\n", + "Type : zarr.hierarchy.Group\n", + "Read-only : False\n", + "Store type : zarr.storage.DictStore\n", + "No. members : 1001\n", + "No. arrays : 0\n", + "No. groups : 1001\n", + "Groups : 0, 1, 10, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 11,\n", + " : 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 12, 120, 121,\n", + " : 122, 123, 124, 125, 126, 127, 128, 129, 13, 130, 131, 132, 133,\n", + " : 134, 135, 136, 137, 138, 139, 14, 140, 141, 142, 143, 144, 145,\n", + " : 146, 147, 148, 149, 15, 150, 151, 152, 153, 154, 155, 156, 157,\n", + " : 158, 159, 16, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,\n", + " : 17, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18, 180,\n", + " : 181, 182, 183, 184, 185, 186, 187, 188, 189, 19, 190, 191, 192,\n", + " : 193, 194, 195, 196, 197, 198, 199, 2, 20, 200, 201, 202, 203, 204,\n", + " : 205, 206, 207, 208, 209, 21, 210, 211, 212, 213, 214, 215, 216,\n", + " : 217, 218, 219, 22, 220, 221, 222, 223, 224, 225, 226, 227, 228,\n", + " : 229, 23, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 24,\n", + " : 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 25, 250, 251,\n", + " : 252, 253, 254, 255, 256, 257, 258, 259, 26, 260, 261, 262, 263,\n", + " : 264, 265, 266, 267, 268, 269, 27, 270, 271, 272, 273, 274, 275,\n", + " : 276, 277, 278, 279, 28, 280, 281, 282, 283, 284, 285, 286, 287,\n", + " : 288, 289, 29, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 3,\n", + " : 30, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 31, 310,\n", + " : 311, 312, 313, 314, 315, 316, 317, 318, 319, 32, 320, 321, 322,\n", + " : 323, 324, 325, 326, 327, 328, 329, 33, 330, 331, 332, 333, 334,\n", + " : 335, 336, 337, 338, 339, 34, 340, 341, 342, 343, 344, 345, 346,\n", + " : 347, 348, 349, 35, 350, 351, 352, 353, 354, 355, 356, 357, 358,\n", + " : 359, 36, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 37,\n", + " : 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 38, 380, 381,\n", + " : 382, 383, 384, 385, 386, 387, 388, 389, 39, 390, 391, 392, 393,\n", + " : 394, 395, 396, 397, 398, 399, 4, 40, 400, 401, 402, 403, 404, 405,\n", + " : 406, 407, 408, 409, 41, 410, 411, 412, 413, 414, 415, 416, 417,\n", + " : 418, 419, 42, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429,\n", + " : 43, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 44, 440,\n", + " : 441, 442, 443, 444, 445, 446, 447, 448, 449, 45, 450, 451, 452,\n", + " : 453, 454, 455, 456, 457, 458, 459, 46, 460, 461, 462, 463, 464,\n", + " : 465, 466, 467, 468, 469, 47, 470, 471, 472, 473, 474, 475, 476,\n", + " : 477, 478, 479, 48, 480, 481, 482, 483, 484, 485, 486, 487, 488,\n", + " : 489, 49, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 5, 50,\n", + " : 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 51, 510, 511,\n", + " : 512, 513, 514, 515, 516, 517, 518, 519, 52, 520, 521, 522, 523,\n", + " : 524, 525, 526, 527, 528, 529, 53, 530, 531, 532, 533, 534, 535,\n", + " : 536, 537, 538, 539, 54, 540, 541, 542, 543, 544, 545, 546, 547,\n", + " : 548, 549, 55, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559,\n", + " : 56, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 57, 570,\n", + " : 571, 572, 573, 574, 575, 576, 577, 578, 579, 58, 580, 581, 582,\n", + " : 583, 584, 585, 586, 587, 588, 589, 59, 590, 591, 592, 593, 594,\n", + " : 595, 596, 597, 598, 599, 6, 60, 600, 601, 602, 603, 604, 605, 606,\n", + " : 607, 608, 609, 61, 610, 611, 612, 613, 614, 615, 616, 617, 618,\n", + " : 619, 62, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 63,\n", + " : 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 64, 640, 641,\n", + " : 642, 643, 644, 645, 646, 647, 648, 649, 65, 650, 651, 652, 653,\n", + " : 654, 655, 656, 657, 658, 659, 66, 660, 661, 662, 663, 664, 665,\n", + " : 666, 667, 668, 669, 67, 670, 671, 672, 673, 674, 675, 676, 677,\n", + " : 678, 679, 68, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689,\n", + " : 69, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 7, 70, 700,\n", + " : 701, 702, 703, 704, 705, 706, 707, 708, 709, 71, 710, 711, 712,\n", + " : 713, 714, 715, 716, 717, 718, 719, 72, 720, 721, 722, 723, 724,\n", + " : 725, 726, 727, 728, 729, 73, 730, 731, 732, 733, 734, 735, 736,\n", + " : 737, 738, 739, 74, 740, 741, 742, 743, 744, 745, 746, 747, 748,\n", + " : 749, 75, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 76,\n", + " : 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 77, 770, 771,\n", + " : 772, 773, 774, 775, 776, 777, 778, 779, 78, 780, 781, 782, 783,\n", + " : 784, 785, 786, 787, 788, 789, 79, 790, 791, 792, 793, 794, 795,\n", + " : 796, 797, 798, 799, 8, 80, 800, 801, 802, 803, 804, 805, 806, 807,\n", + " : 808, 809, 81, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819,\n", + " : 82, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 83, 830,\n", + " : 831, 832, 833, 834, 835, 836, 837, 838, 839, 84, 840, 841, 842,\n", + " : 843, 844, 845, 846, 847, 848, 849, 85, 850, 851, 852, 853, 854,\n", + " : 855, 856, 857, 858, 859, 86, 860, 861, 862, 863, 864, 865, 866,\n", + " : 867, 868, 869, 87, 870, 871, 872, 873, 874, 875, 876, 877, 878,\n", + " : 879, 88, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 89,\n", + " : 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 9, 90, 900, 901,\n", + " : 902, 903, 904, 905, 906, 907, 908, 909, 91, 910, 911, 912, 913,\n", + " : 914, 915, 916, 917, 918, 919, 92, 920, 921, 922, 923, 924, 925,\n", + " : 926, 927, 928, 929, 93, 930, 931, 932, 933, 934, 935, 936, 937,\n", + " : 938, 939, 94, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949,\n", + " : 95, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 96, 960,\n", + " : 961, 962, 963, 964, 965, 966, 967, 968, 969, 97, 970, 971, 972,\n", + " : 973, 974, 975, 976, 977, 978, 979, 98, 980, 981, 982, 983, 984,\n", + " : 985, 986, 987, 988, 989, 99, 990, 991, 992, 993, 994, 995, 996,\n", + " : 997, 998, 999, foo" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root.info" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Name/foo/bar
Typezarr.hierarchy.Group
Read-onlyFalse
Store typezarr.storage.DictStore
No. members1
No. arrays1
No. groups0
Arraysbaz
" + ], + "text/plain": [ + "Name : /foo/bar\n", + "Type : zarr.hierarchy.Group\n", + "Read-only : False\n", + "Store type : zarr.storage.DictStore\n", + "No. members : 1\n", + "No. arrays : 1\n", + "No. groups : 0\n", + "Arrays : baz" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "root['foo/bar'].info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements_dev.txt b/requirements_dev.txt index 24232db51d..d273262177 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,28 +1,34 @@ -# pinned dependencies for the development and CI environment -appdirs +appdirs==1.4.3 args==0.1.0 +certifi==2017.7.27.1 +chardet==3.0.4 clint==0.5.1 -coverage==4.3.4 -Cython==0.25.2 +coverage==4.4.1 +coveralls==1.2.0 +Cython==0.27.2 +docopt==0.6.2 fasteners==0.14.1 -flake8==3.3.0 +flake8==3.5.0 +idna==2.6 mccabe==0.6.1 -monotonic==1.2 +monotonic==1.3 msgpack-python==0.4.8 nose==1.3.7 -numcodecs==0.2.0 -numpy==1.12.0 -packaging +numcodecs==0.2.1 +numpy==1.13.3 +packaging==16.8 pkginfo==1.4.1 -pluggy==0.4.0 -py==1.4.32 +pluggy==0.5.2 +py==1.4.34 pycodestyle==2.3.1 -pyflakes==1.5.0 -pyparsing==2.1.10 -requests==2.13.0 -requests-toolbelt==0.7.1 -setuptools-scm==1.15.0 -six -tox==2.6.0 -twine==1.8.1 +pyflakes==1.6.0 +pyparsing==2.2.0 +requests==2.18.4 +requests-toolbelt==0.8.0 +setuptools-scm==1.15.6 +tox==2.9.1 +tox-travis==0.8 +tqdm==4.19.4 +twine==1.9.1 +urllib3==1.22 virtualenv==15.1.0 diff --git a/tox.ini b/tox.ini index 1bc6aca0e1..f98a40b917 100644 --- a/tox.ini +++ b/tox.ini @@ -10,13 +10,11 @@ envlist = py27, py34, py35, py36, docs setenv = PYTHONHASHSEED = 42 commands = - python setup.py build_ext --inplace - py27,py34,py35: nosetests -v --with-coverage --cover-erase --cover-package=zarr zarr + py27,py34,py35: nosetests -v zarr py36: nosetests -v --with-coverage --cover-erase --cover-package=zarr --with-doctest --doctest-options=+NORMALIZE_WHITESPACE,+ELLIPSIS zarr py36: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst py36: flake8 --max-line-length=100 zarr - python setup.py bdist_wheel - coverage report -m + py36: coverage report -m deps = -rrequirements_dev.txt diff --git a/zarr/core.py b/zarr/core.py index b7f416d3f8..eb054e4e6b 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -7,9 +7,9 @@ import numpy as np -from zarr.util import is_total_slice, normalize_array_selection, \ - get_chunk_range, human_readable_size, normalize_resize_args, \ - normalize_storage_path, normalize_shape, normalize_chunks +from zarr.util import is_total_slice, normalize_array_selection, get_chunk_range, \ + human_readable_size, normalize_resize_args, normalize_storage_path, normalize_shape, \ + normalize_chunks, InfoReporter from zarr.storage import array_meta_key, attrs_key, listdir, getsize from zarr.meta import decode_array_metadata, encode_array_metadata from zarr.attrs import Attributes @@ -65,6 +65,7 @@ class Array(object): nchunks nchunks_initialized is_view + info Methods ------- @@ -75,7 +76,7 @@ class Array(object): view astype - """ # flake8: noqa + """ def __init__(self, store, path=None, read_only=False, chunk_store=None, synchronizer=None, cache_metadata=True): @@ -83,16 +84,13 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, # configuration metadata fully specified and normalized self._store = store + self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: self._key_prefix = self._path + '/' else: self._key_prefix = '' self._read_only = read_only - if chunk_store is None: - self._chunk_store = store - else: - self._chunk_store = chunk_store self._synchronizer = synchronizer self._cache_metadata = cache_metadata self._is_view = False @@ -105,6 +103,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._attrs = Attributes(store, key=akey, read_only=read_only, synchronizer=synchronizer) + # initialize info reporter + self._info_reporter = InfoReporter(self) + def _load_metadata(self): """(Re)load metadata from store.""" if self._synchronizer is None: @@ -198,9 +199,11 @@ def read_only(self): @property def chunk_store(self): - """A MutableMapping providing the underlying storage for array - chunks.""" - return self._chunk_store + """A MutableMapping providing the underlying storage for array chunks.""" + if self._chunk_store is None: + return self._store + else: + return self._chunk_store @property def shape(self): @@ -299,7 +302,7 @@ def nbytes_stored(self): includes storage required for configuration metadata and user attributes.""" m = getsize(self._store, self._path) - if self._store == self._chunk_store: + if self._chunk_store is None: return m else: n = getsize(self._chunk_store, self._path) @@ -333,7 +336,8 @@ def nchunks(self): @property def nchunks_initialized(self): """The number of chunks that have been initialized with some data.""" - return sum(1 for k in listdir(self._chunk_store, self._path) + # TODO fix bug here, need to only count chunks + return sum(1 for k in listdir(self.chunk_store, self._path) if k not in [array_meta_key, attrs_key]) # backwards compability @@ -382,10 +386,7 @@ def __getitem__(self, item): >>> import numpy as np >>> z = zarr.array(np.arange(100000000), chunks=1000000, dtype='i4') >>> z - Array((100000000,), int32, chunks=(1000000,), order=C) - nbytes: 381.5M; nbytes_stored: 6.4M; ratio: 59.9; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + Take some slices:: @@ -407,10 +408,7 @@ def __getitem__(self, item): >>> z = zarr.array(np.arange(100000000).reshape(10000, 10000), ... chunks=(1000, 1000), dtype='i4') >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: 9.2M; ratio: 41.5; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + Take some slices:: @@ -439,7 +437,7 @@ def __getitem__(self, item): [99980000, 99980001, 99980002, ..., 99989997, 99989998, 99989999], [99990000, 99990001, 99990002, ..., 99999997, 99999998, 99999999]], dtype=int32) - """ # flake8: noqa + """ # refresh metadata if not self._cache_metadata: @@ -505,10 +503,7 @@ def __setitem__(self, item, value): >>> import zarr >>> z = zarr.zeros(100000000, chunks=1000000, dtype='i4') >>> z - Array((100000000,), int32, chunks=(1000000,), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + Set all array elements to the same scalar value:: @@ -527,10 +522,7 @@ def __setitem__(self, item, value): >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4') >>> z - Array((10000, 10000), int32, chunks=(1000, 1000), order=C) - nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + Set all array elements to the same scalar value:: @@ -636,7 +628,7 @@ def _chunk_getitem(self, cidx, item, dest): # obtain compressed data for chunk ckey = self._chunk_key(cidx) - cdata = self._chunk_store[ckey] + cdata = self.chunk_store[ckey] except KeyError: @@ -738,7 +730,7 @@ def _chunk_setitem_nosync(self, cidx, item, value): try: # obtain compressed data for chunk - cdata = self._chunk_store[ckey] + cdata = self.chunk_store[ckey] except KeyError: @@ -761,7 +753,7 @@ def _chunk_setitem_nosync(self, cidx, item, value): cdata = self._encode_chunk(chunk) # store - self._chunk_store[ckey] = cdata + self.chunk_store[ckey] = cdata def _chunk_key(self, cidx): return self._key_prefix + '.'.join(map(str, cidx)) @@ -806,55 +798,99 @@ def _encode_chunk(self, chunk): return cdata def __repr__(self): - # N.B., __repr__ needs to be synchronized to ensure consistent view - # of metadata AND when retrieving nbytes_stored from filesystem storage - return self._synchronized_op(self._repr_nosync) + t = type(self) + r = '<%s.%s' % (t.__module__, t.__name__) + if self.name: + r += ' %r' % self.name + r += ' %s' % str(self.shape) + r += ' %s' % self.dtype + r += '>' + return r + + @property + def info(self): + """Report some diagnostic information about the array. - def _repr_nosync(self): + Examples + -------- + >>> import zarr + >>> z = zarr.zeros(1000000, chunks=100000, dtype='i4') + >>> z.info + Type : zarr.core.Array + Data type : int32 + Shape : (1000000,) + Chunk shape : (100000,) + Order : C + Read-only : False + Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) + Store type : builtins.dict + No. bytes : 4000000 (3.8M) + No. bytes stored : ... + Storage ratio : ... + Chunks initialized : 0/10 - # main line - r = '%s(' % type(self).__name__ - if self.name: - r += '%s, ' % self.name - r += '%s, ' % str(self._shape) - r += '%s, ' % str(self._dtype) - r += 'chunks=%s, ' % str(self._chunks) - r += 'order=%s' % self._order - r += ')' - - # storage size info - r += '\n nbytes: %s' % human_readable_size(self._nbytes) - if self.nbytes_stored > 0: - r += '; nbytes_stored: %s' % human_readable_size( - self.nbytes_stored) - r += '; ratio: %.1f' % (self._nbytes / self.nbytes_stored) - r += '; initialized: %s/%s' % (self.nchunks_initialized, - self._nchunks) + """ + return self._info_reporter + + def info_items(self): + return self._synchronized_op(self._info_items_nosync) + + def _info_items_nosync(self): + + def typestr(o): + return '%s.%s' % (type(o).__module__, type(o).__name__) + + def bytestr(n): + if n > 2**10: + return '%s (%s)' % (n, human_readable_size(n)) + else: + return str(n) + + items = [] + + # basic info + if self.name is not None: + items += [('Name', self.name)] + items += [ + ('Type', typestr(self)), + ('Data type', '%s' % self.dtype), + ('Shape', str(self.shape)), + ('Chunk shape', str(self.chunks)), + ('Order', self.order), + ('Read-only', str(self.read_only)), + ] # filters - if self._filters: - # first line - r += '\n filters: %r' % self._filters[0] - # subsequent lines - for f in self._filters[1:]: - r += '\n %r' % f + if self.filters: + for i, f in enumerate(self.filters): + items += [('Filter [%s]' % i, repr(f))] # compressor - if self._compressor: - r += '\n compressor: %r' % self._compressor + items += [('Compressor', repr(self.compressor))] - # storage and synchronizer classes - r += '\n store: %s' % type(self._store).__name__ - if self._store != self._chunk_store: - r += '; chunk_store: %s' % type(self._chunk_store).__name__ + # synchronizer if self._synchronizer is not None: - r += '; synchronizer: %s' % type(self._synchronizer).__name__ + items += [('Synchronizer type', typestr(self._synchronizer))] - return r + # storage info + items += [('Store type', typestr(self._store))] + if self._chunk_store is not None: + items += [('Chunk store type', typestr(self._chunk_store))] + items += [('No. bytes', bytestr(self.nbytes))] + if self.nbytes_stored > 0: + items += [ + ('No. bytes stored', bytestr(self.nbytes_stored)), + ('Storage ratio', '%.1f' % (self.nbytes / self.nbytes_stored)), + ] + items += [ + ('Chunks initialized', '%s/%s' % (self.nchunks_initialized, self.nchunks)) + ] + + return items def __getstate__(self): - return self._store, self._path, self._read_only, self._chunk_store, \ - self._synchronizer, self._cache_metadata + return self._store, self._path, self._read_only, self._chunk_store, self._synchronizer, \ + self._cache_metadata def __setstate__(self, state): self.__init__(*state) @@ -906,7 +942,7 @@ def resize(self, *args): If one or more dimensions are shrunk, any chunks falling outside the new array shape will be deleted from the underlying store. - """ # flake8: noqa + """ return self._write_op(self._resize_nosync, *args) @@ -927,13 +963,14 @@ def _resize_nosync(self, *args): for s, c in zip(new_shape, chunks)) # remove any chunks not within range + chunk_store = self.chunk_store for cidx in itertools.product(*[range(n) for n in old_cdata_shape]): if all(i < c for i, c in zip(cidx, new_cdata_shape)): pass # keep the chunk else: key = self._chunk_key(cidx) try: - del self._chunk_store[key] + del chunk_store[key] except KeyError: # chunk not initialized pass @@ -970,8 +1007,7 @@ def append(self, data, axis=0): >>> z.append(np.vstack([a, a]), axis=1) (20000, 2000) >>> z - Array((20000, 2000), int32, chunks=(1000, 100), order=C) - ... + """ return self._write_op(self._append_nosync, data, axis=axis) @@ -1120,7 +1156,7 @@ def view(self, shape=None, chunks=None, dtype=None, ... print(e) not permitted for views - """ # flake8: noqa + """ store = self._store chunk_store = self._chunk_store @@ -1129,9 +1165,8 @@ def view(self, shape=None, chunks=None, dtype=None, read_only = self._read_only if synchronizer is None: synchronizer = self._synchronizer - a = Array(store=store, path=path, chunk_store=chunk_store, - read_only=read_only, synchronizer=synchronizer, - cache_metadata=True) + a = Array(store=store, path=path, chunk_store=chunk_store, read_only=read_only, + synchronizer=synchronizer, cache_metadata=True) a._is_view = True # allow override of some properties @@ -1204,7 +1239,7 @@ def astype(self, dtype): 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., 90., 91., 92., 93., 94., 95., 96., 97., 98., 99.], dtype=float32) - """ # flake8: noqa + """ dtype = np.dtype(dtype) diff --git a/zarr/creation.py b/zarr/creation.py index 0b28a23f25..c40977eb43 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -66,10 +66,7 @@ def create(shape, chunks=None, dtype=None, compressor='default', >>> import zarr >>> z = zarr.create((10000, 10000), chunks=(1000, 1000)) >>> z - Array((10000, 10000), float64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + Create an array with different some different configuration options:: @@ -77,10 +74,7 @@ def create(shape, chunks=None, dtype=None, compressor='default', >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='i1', order='F', ... compressor=Blosc(cname='zstd', clevel=1, shuffle=Blosc.BITSHUFFLE)) >>> z - Array((10000, 10000), int8, chunks=(1000, 1000), order=F) - nbytes: 95.4M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='zstd', clevel=1, shuffle=BITSHUFFLE, blocksize=0) - store: dict + To create an array with object dtype requires a filter that can handle Python object encoding, e.g., `MsgPack` or `Pickle` from `numcodecs`:: @@ -89,11 +83,7 @@ def create(shape, chunks=None, dtype=None, compressor='default', >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='object', ... filters=[MsgPack()]) >>> z - Array((10000, 10000), object, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - filters: MsgPack(encoding='utf-8') - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + Example with some filters, and also storing chunks separately from metadata:: @@ -103,14 +93,9 @@ def create(shape, chunks=None, dtype=None, compressor='default', ... filters=[Quantize(digits=2, dtype='f8'), Adler32()], ... store=store, chunk_store=chunk_store) >>> z - Array((10000, 10000), float64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - filters: Quantize(digits=2, dtype=' - """ # flake8: noqa + """ # handle polymorphic store arg store = _handle_store_arg(store) @@ -222,15 +207,12 @@ def zeros(shape, **kwargs): >>> import zarr >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000)) >>> z - Array((10000, 10000), float64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + >>> z[:2, :2] array([[ 0., 0.], [ 0., 0.]]) - """ # flake8: noqa + """ return create(shape=shape, fill_value=0, **kwargs) @@ -246,15 +228,12 @@ def ones(shape, **kwargs): >>> import zarr >>> z = zarr.ones((10000, 10000), chunks=(1000, 1000)) >>> z - Array((10000, 10000), float64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + >>> z[:2, :2] array([[ 1., 1.], [ 1., 1.]]) - """ # flake8: noqa + """ return create(shape=shape, fill_value=1, **kwargs) @@ -270,15 +249,12 @@ def full(shape, fill_value, **kwargs): >>> import zarr >>> z = zarr.full((10000, 10000), chunks=(1000, 1000), fill_value=42) >>> z - Array((10000, 10000), float64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + >>> z[:2, :2] array([[ 42., 42.], [ 42., 42.]]) - """ # flake8: noqa + """ return create(shape=shape, fill_value=fill_value, **kwargs) @@ -316,12 +292,9 @@ def array(data, **kwargs): >>> a = np.arange(100000000).reshape(10000, 10000) >>> z = zarr.array(a, chunks=(1000, 1000)) >>> z - Array((10000, 10000), int64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: 15.2M; ratio: 50.2; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: dict + - """ # flake8: noqa + """ # ensure data is array-like if not hasattr(data, 'shape') or not hasattr(data, 'dtype'): @@ -403,16 +376,10 @@ def open_array(store=None, mode='a', shape=None, chunks=None, dtype=None, ... chunks=(1000, 1000), fill_value=0) >>> z1[:] = np.arange(100000000).reshape(10000, 10000) >>> z1 - Array((10000, 10000), float64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: 23.0M; ratio: 33.2; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: DirectoryStore + >>> z2 = zarr.open_array('example.zarr', mode='r') >>> z2 - Array((10000, 10000), float64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: 23.0M; ratio: 33.2; initialized: 100/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: DirectoryStore + >>> np.all(z1[:] == z2[:]) True @@ -421,7 +388,7 @@ def open_array(store=None, mode='a', shape=None, chunks=None, dtype=None, There is no need to close an array. Data are automatically flushed to the file system. - """ # flake8: noqa + """ # use same mode semantics as h5py # r : read only, must exist diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 42f9b93c0b..937c53d12c 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -13,9 +13,8 @@ DictStore, DirectoryStore, group_meta_key, attrs_key, listdir, rmdir from zarr.creation import array, create, empty, zeros, ones, full, \ empty_like, zeros_like, ones_like, full_like -from zarr.util import normalize_storage_path, normalize_shape -from zarr.errors import PermissionError, err_contains_array, \ - err_contains_group, err_group_not_found, err_read_only +from zarr.util import normalize_storage_path, normalize_shape, InfoReporter +from zarr.errors import err_contains_array, err_contains_group, err_group_not_found, err_read_only from zarr.meta import decode_group_metadata @@ -45,6 +44,7 @@ class Group(MutableMapping): chunk_store synchronizer attrs + info Methods ------- @@ -79,20 +79,16 @@ class Group(MutableMapping): """ - def __init__(self, store, path=None, read_only=False, chunk_store=None, - synchronizer=None): + def __init__(self, store, path=None, read_only=False, chunk_store=None, synchronizer=None): self._store = store + self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: self._key_prefix = self._path + '/' else: self._key_prefix = '' self._read_only = read_only - if chunk_store is None: - self._chunk_store = store - else: - self._chunk_store = chunk_store self._synchronizer = synchronizer # guard conditions @@ -114,6 +110,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._attrs = Attributes(store, key=akey, read_only=read_only, synchronizer=synchronizer) + # setup info + self.info = InfoReporter(self) + @property def store(self): """A MutableMapping providing the underlying storage for the group.""" @@ -142,9 +141,11 @@ def read_only(self): @property def chunk_store(self): - """A MutableMapping providing the underlying storage for array - chunks.""" - return self._chunk_store + """A MutableMapping providing the underlying storage for array chunks.""" + if self._chunk_store is None: + return self._store + else: + return self._chunk_store @property def synchronizer(self): @@ -197,54 +198,61 @@ def __len__(self): return sum(1 for _ in self) def __repr__(self): + t = type(self) + r = '<%s.%s' % (t.__module__, t.__name__) + if self.name: + r += ' %r' % self.name + r += '>' + return r + + def info_items(self): + + def typestr(o): + return '%s.%s' % (type(o).__module__, type(o).__name__) + + items = [] - # main line - r = '%s(' % type(self).__name__ - r += self.name + ', ' - r += str(len(self)) - r += ')' + # basic info + if self.name is not None: + items += [('Name', self.name)] + items += [ + ('Type', typestr(self)), + ('Read-only', str(self.read_only)), + ] + + # synchronizer + if self._synchronizer is not None: + items += [('Synchronizer type', typestr(self._synchronizer))] + + # storage info + items += [('Store type', typestr(self._store))] + if self._chunk_store is not None: + items += [('Chunk store type', typestr(self._chunk_store))] # members - array_keys = list(self.array_keys()) + items += [('No. members', len(self))] + array_keys = sorted(self.array_keys()) + group_keys = sorted(self.group_keys()) + items += [('No. arrays', len(array_keys))] + items += [('No. groups', len(group_keys))] if array_keys: - arrays_line = '\n arrays: %s; %s' % \ - (len(array_keys), ', '.join(array_keys)) - if len(arrays_line) > 80: - arrays_line = arrays_line[:77] + '...' - r += arrays_line - group_keys = list(self.group_keys()) + items += [('Arrays', ', '.join(array_keys))] if group_keys: - groups_line = '\n groups: %s; %s' % \ - (len(group_keys), ', '.join(group_keys)) - if len(groups_line) > 80: - groups_line = groups_line[:77] + '...' - r += groups_line - - # storage and synchronizer classes - r += '\n store: %s' % type(self.store).__name__ - if self.store != self.chunk_store: - r += '; chunk_store: %s' % type(self.chunk_store).__name__ - if self.synchronizer is not None: - r += '; synchronizer: %s' % type(self.synchronizer).__name__ + items += [('Groups', ', '.join(group_keys))] - return r + return items def __getstate__(self): - return self._store, self._path, self._read_only, self._chunk_store, \ - self._synchronizer + return self._store, self._path, self._read_only, self._chunk_store, self._synchronizer def __setstate__(self, state): self.__init__(*state) def _item_path(self, item): - if item and item[0] == '/': - # absolute path - path = normalize_storage_path(item) - else: - # relative path - path = normalize_storage_path(item) - if self._path: - path = self._key_prefix + path + absolute = isinstance(item, str) and item and item[0] == '/' + path = normalize_storage_path(item) + if not absolute and self._path: + path = self._key_prefix + path return path def __contains__(self, item): @@ -282,29 +290,20 @@ def __getitem__(self, item): >>> g1 = zarr.group() >>> d1 = g1.create_dataset('foo/bar/baz', shape=100, chunks=10) >>> g1['foo'] - Group(/foo, 1) - groups: 1; bar - store: DictStore + >>> g1['foo/bar'] - Group(/foo/bar, 1) - arrays: 1; baz - store: DictStore + >>> g1['foo/bar/baz'] - Array(/foo/bar/baz, (100,), float64, chunks=(10,), order=C) - nbytes: 800; nbytes_stored: ...; ratio: ...; initialized: 0/10 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: DictStore + - """ # flake8: noqa + """ path = self._item_path(item) if contains_array(self._store, path): return Array(self._store, read_only=self._read_only, path=path, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer) + chunk_store=self._chunk_store, synchronizer=self._synchronizer) elif contains_group(self._store, path): return Group(self._store, read_only=self._read_only, path=path, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer) + chunk_store=self._chunk_store, synchronizer=self._synchronizer) else: raise KeyError(item) @@ -369,10 +368,8 @@ def groups(self): for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key if contains_group(self._store, path): - yield key, Group(self._store, path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer) + yield key, Group(self._store, path=path, read_only=self._read_only, + chunk_store=self._chunk_store, synchronizer=self._synchronizer) def array_keys(self): """Return an iterator over member names for arrays only. @@ -414,10 +411,8 @@ def arrays(self): for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key if contains_array(self._store, path): - yield key, Array(self._store, path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer) + yield key, Array(self._store, path=path, read_only=self._read_only, + chunk_store=self._chunk_store, synchronizer=self._synchronizer) def visitvalues(self, func): """Run ``func`` on each object. @@ -437,27 +432,20 @@ def visitvalues(self, func): >>> def print_visitor(obj): ... print(obj) >>> g1.visitvalues(print_visitor) - Group(/bar, 2) - groups: 2; baz, quux - store: DictStore - Group(/bar/baz, 0) - store: DictStore - Group(/bar/quux, 0) - store: DictStore - Group(/foo, 0) - store: DictStore + + + + >>> g3.visitvalues(print_visitor) - Group(/bar/baz, 0) - store: DictStore - Group(/bar/quux, 0) - store: DictStore + + """ def _visit(obj): yield obj - keys = sorted(getattr(obj, "keys", lambda : [])()) + keys = sorted(getattr(obj, "keys", lambda: [])()) for each_key in keys: for each_obj in _visit(obj[each_key]): yield each_obj @@ -522,20 +510,13 @@ def visititems(self, func): >>> def print_visitor(name, obj): ... print((name, obj)) >>> g1.visititems(print_visitor) - ('bar', Group(/bar, 2) - groups: 2; baz, quux - store: DictStore) - ('bar/baz', Group(/bar/baz, 0) - store: DictStore) - ('bar/quux', Group(/bar/quux, 0) - store: DictStore) - ('foo', Group(/foo, 0) - store: DictStore) + ('bar', ) + ('bar/baz', ) + ('bar/quux', ) + ('foo', ) >>> g3.visititems(print_visitor) - ('baz', Group(/bar/baz, 0) - store: DictStore) - ('quux', Group(/bar/quux, 0) - store: DictStore) + ('baz', ) + ('quux', ) """ @@ -587,12 +568,10 @@ def _create_group_nosync(self, name, overwrite=False): path = self._item_path(name) # create terminal group - init_group(self._store, path=path, chunk_store=self._chunk_store, - overwrite=overwrite) + init_group(self._store, path=path, chunk_store=self._chunk_store, overwrite=overwrite) return Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer) + chunk_store=self._chunk_store, synchronizer=self._synchronizer) def create_groups(self, *names, **kwargs): """Convenience method to create multiple groups in a single call.""" @@ -631,13 +610,11 @@ def _require_group_nosync(self, name, overwrite=False): # create terminal group if necessary if not contains_group(self._store, path): - init_group(store=self._store, path=path, - chunk_store=self._chunk_store, + init_group(store=self._store, path=path, chunk_store=self._chunk_store, overwrite=overwrite) return Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer) + chunk_store=self._chunk_store, synchronizer=self._synchronizer) def require_groups(self, *names): """Convenience method to require multiple groups in a single call.""" @@ -689,12 +666,13 @@ def create_dataset(self, name, **kwargs): >>> d1 = g1.create_dataset('foo', shape=(10000, 10000), ... chunks=(1000, 1000)) >>> d1 - Array(/foo, (10000, 10000), float64, chunks=(1000, 1000), order=C) - nbytes: 762.9M; nbytes_stored: ...; ratio: ...; initialized: 0/100 - compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - store: DictStore + + >>> d2 = g1.create_dataset('bar/baz/qux', shape=(100, 100, 100), + ... chunks=(100, 10, 10)) + >>> d2 + - """ # flake8: noqa + """ return self._write_op(self._create_dataset_nosync, name, **kwargs) @@ -707,12 +685,10 @@ def _create_dataset_nosync(self, name, data=None, **kwargs): # create array if data is None: - a = create(store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + a = create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) else: - a = array(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) return a @@ -746,8 +722,8 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, synchronizer = kwargs.get('synchronizer', self._synchronizer) cache_metadata = kwargs.get('cache_metadata', True) a = Array(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, - synchronizer=synchronizer, cache_metadata=cache_metadata) + chunk_store=self._chunk_store, synchronizer=synchronizer, + cache_metadata=cache_metadata) shape = normalize_shape(shape) if shape != a.shape: raise TypeError('shapes do not match') @@ -772,8 +748,7 @@ def create(self, name, **kwargs): def _create_nosync(self, name, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return create(store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def empty(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -783,8 +758,7 @@ def empty(self, name, **kwargs): def _empty_nosync(self, name, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return empty(store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return empty(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def zeros(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -794,8 +768,7 @@ def zeros(self, name, **kwargs): def _zeros_nosync(self, name, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return zeros(store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return zeros(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def ones(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -805,8 +778,7 @@ def ones(self, name, **kwargs): def _ones_nosync(self, name, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return ones(store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return ones(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def full(self, name, fill_value, **kwargs): """Create an array. Keyword arguments as per @@ -816,8 +788,7 @@ def full(self, name, fill_value, **kwargs): def _full_nosync(self, name, fill_value, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return full(store=self._store, path=path, - chunk_store=self._chunk_store, + return full(store=self._store, path=path, chunk_store=self._chunk_store, fill_value=fill_value, **kwargs) def array(self, name, data, **kwargs): @@ -828,8 +799,7 @@ def array(self, name, data, **kwargs): def _array_nosync(self, name, data, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return array(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def empty_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -839,8 +809,8 @@ def empty_like(self, name, data, **kwargs): def _empty_like_nosync(self, name, data, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return empty_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return empty_like(data, store=self._store, path=path, chunk_store=self._chunk_store, + **kwargs) def zeros_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -850,8 +820,8 @@ def zeros_like(self, name, data, **kwargs): def _zeros_like_nosync(self, name, data, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return zeros_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return zeros_like(data, store=self._store, path=path, chunk_store=self._chunk_store, + **kwargs) def ones_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -861,8 +831,8 @@ def ones_like(self, name, data, **kwargs): def _ones_like_nosync(self, name, data, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return ones_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return ones_like(data, store=self._store, path=path, chunk_store=self._chunk_store, + **kwargs) def full_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -872,8 +842,8 @@ def full_like(self, name, data, **kwargs): def _full_like_nosync(self, name, data, **kwargs): path = self._item_path(name) kwargs.setdefault('synchronizer', self._synchronizer) - return full_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + return full_like(data, store=self._store, path=path, chunk_store=self._chunk_store, + **kwargs) def _handle_store_arg(store): @@ -916,16 +886,14 @@ def group(store=None, overwrite=False, chunk_store=None, synchronizer=None, >>> import zarr >>> g = zarr.group() >>> g - Group(/, 0) - store: DictStore + Create a group with a different store:: >>> store = zarr.DirectoryStore('example') >>> g = zarr.group(store=store, overwrite=True) >>> g - Group(/, 0) - store: DirectoryStore + """ @@ -970,14 +938,10 @@ def open_group(store=None, mode='a', synchronizer=None, path=None): >>> foo = root.create_group('foo') >>> bar = root.create_group('bar') >>> root - Group(/, 2) - groups: 2; bar, foo - store: DirectoryStore + >>> root2 = zarr.open_group('example', mode='a') >>> root2 - Group(/, 2) - groups: 2; bar, foo - store: DirectoryStore + >>> root == root2 True diff --git a/zarr/storage.py b/zarr/storage.py index 38c6f883e4..782361bae1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -132,8 +132,7 @@ def _require_parent_group(path, store, chunk_store, overwrite): for i in range(len(segments)): p = '/'.join(segments[:i]) if contains_array(store, p): - _init_group_metadata(store, path=p, chunk_store=chunk_store, - overwrite=overwrite) + _init_group_metadata(store, path=p, chunk_store=chunk_store, overwrite=overwrite) elif not contains_group(store, p): _init_group_metadata(store, path=p, chunk_store=chunk_store) @@ -251,8 +250,7 @@ def init_array(store, shape, chunks=None, dtype=None, compressor='default', path = normalize_storage_path(path) # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, @@ -260,16 +258,15 @@ def init_array(store, shape, chunks=None, dtype=None, compressor='default', chunk_store=chunk_store, filters=filters) -def _init_array_metadata(store, shape, chunks=None, dtype=None, - compressor='default', - fill_value=None, order='C', overwrite=False, - path=None, chunk_store=None, filters=None): +def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='default', + fill_value=None, order='C', overwrite=False, path=None, + chunk_store=None, filters=None): # guard conditions if overwrite: # attempt to delete any pre-existing items in store rmdir(store, path) - if chunk_store is not None and chunk_store != store: + if chunk_store is not None: rmdir(chunk_store, path) elif contains_array(store, path): err_contains_array(path) @@ -339,12 +336,10 @@ def init_group(store, overwrite=False, path=None, chunk_store=None): path = normalize_storage_path(path) # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) # initialise metadata - _init_group_metadata(store=store, overwrite=overwrite, path=path, - chunk_store=chunk_store) + _init_group_metadata(store=store, overwrite=overwrite, path=path, chunk_store=chunk_store) def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None): @@ -353,7 +348,7 @@ def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None): if overwrite: # attempt to delete any pre-existing items in store rmdir(store, path) - if chunk_store is not None and chunk_store != store: + if chunk_store is not None: rmdir(chunk_store, path) elif contains_array(store, path): err_contains_array(path) @@ -420,7 +415,7 @@ class DictStore(MutableMapping): >>> sorted(store.keys()) ['foo'] - """ # flake8: noqa + """ def __init__(self, cls=dict): self.root = cls() @@ -599,7 +594,7 @@ class DirectoryStore(MutableMapping): >>> os.path.exists('example_store/a') False - """ # flake8: noqa + """ def __init__(self, path): diff --git a/zarr/sync.py b/zarr/sync.py index d00b157b55..fe936e13e2 100644 --- a/zarr/sync.py +++ b/zarr/sync.py @@ -38,7 +38,7 @@ class ProcessSynchronizer(object): Path to a directory on a file system that is shared by all processes. N.B., this should be a *different* path to where you store the array. - """ # flake8: noqa + """ def __init__(self, path): self.path = path diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index fe0b82fab1..e03deb73a9 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -160,7 +160,6 @@ def test_array_1d_set_scalar(self): assert_array_equal(a, z[:]) for value in -1, 0, 1, 10: - print(value) a[15:35] = value z[15:35] = value assert_array_equal(a, z[:]) diff --git a/zarr/tests/test_filters.py b/zarr/tests/test_filters.py index 12ad4e43dd..6da5391eaa 100644 --- a/zarr/tests/test_filters.py +++ b/zarr/tests/test_filters.py @@ -34,7 +34,7 @@ def test_array_with_delta_filter(): data = np.arange(100, dtype=dtype) for compressor in compressors: - print(repr(compressor)) + # print(repr(compressor)) a = array(data, chunks=10, compressor=compressor, filters=filters) @@ -65,7 +65,7 @@ def test_array_with_astype_filter(): data = np.arange(shape, dtype=decode_dtype) for compressor in compressors: - print(repr(compressor)) + # print(repr(compressor)) a = array(data, chunks=chunks, compressor=compressor, filters=filters) @@ -95,7 +95,7 @@ def test_array_with_scaleoffset_filter(): data = np.linspace(1000, 1001, 34, dtype='f8') for compressor in compressors: - print(repr(compressor)) + # print(repr(compressor)) a = array(data, chunks=5, compressor=compressor, filters=filters) @@ -124,7 +124,7 @@ def test_array_with_quantize_filter(): data = np.linspace(0, 1, 34, dtype=dtype) for compressor in compressors: - print(repr(compressor)) + # print(repr(compressor)) a = array(data, chunks=5, compressor=compressor, filters=filters) @@ -151,7 +151,7 @@ def test_array_with_packbits_filter(): data = np.random.randint(0, 2, size=100, dtype=bool) for compressor in compressors: - print(repr(compressor)) + # print(repr(compressor)) a = array(data, chunks=5, compressor=compressor, filters=filters) @@ -178,7 +178,7 @@ def test_array_with_categorize_filter(): filters = [flt] for compressor in compressors: - print(repr(compressor)) + # print(repr(compressor)) a = array(data, chunks=5, compressor=compressor, filters=filters) @@ -202,7 +202,7 @@ def test_compressor_as_filter(): if compressor is None: # skip continue - print(repr(compressor)) + # print(repr(compressor)) # setup filters dtype = 'i8' diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index a2784cfd18..2f34d0e3dc 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -21,7 +21,7 @@ from zarr.attrs import Attributes from zarr.errors import PermissionError from zarr.creation import open_array -from zarr.compat import PY2 +from zarr.util import InfoReporter from numcodecs import Zlib @@ -47,10 +47,17 @@ def test_group_init_1(self): store, chunk_store = self.create_store() g = self.create_group(store, chunk_store=chunk_store) assert_is(store, g.store) + if chunk_store is None: + assert_is(store, g.chunk_store) + else: + assert_is(chunk_store, g.chunk_store) assert_false(g.read_only) eq('', g.path) eq('/', g.name) assert_is_instance(g.attrs, Attributes) + assert_is_instance(g.info, InfoReporter) + assert_is_instance(repr(g.info), str) + assert_is_instance(g.info._repr_html_(), str) def test_group_init_2(self): store, chunk_store = self.create_store() @@ -106,6 +113,23 @@ def test_create_group(self): eq('a/b/c', g5.path) eq('/a/b/c', g5.name) + # test non-str keys + class Foo(object): + + def __init__(self, s): + self.s = s + + def __str__(self): + return self.s + + o = Foo('test/object') + go = g1.create_group(o) + assert_is_instance(go, Group) + eq('test/object', go.path) + go = g1.create_group(b'test/bytes') + assert_is_instance(go, Group) + eq('test/bytes', go.path) + # test bad keys with assert_raises(KeyError): g1.create_group('foo') # already exists @@ -608,27 +632,6 @@ def test_getattr(self): # test that hasattr returns False instead of an exception (issue #88) assert_false(hasattr(g1, 'unexistingattribute')) - def test_group_repr(self): - g = self.create_group() - expect = 'Group(/, 0)\n store: dict' - actual = repr(g) - eq(expect, actual) - g.create_group('foo') - g.create_group('bar') - g.create_group('y'*80) - g.create_dataset('baz', shape=100, chunks=10) - g.create_dataset('quux', shape=100, chunks=10) - g.create_dataset('z'*80, shape=100, chunks=10) - expect = \ - 'Group(/, 6)\n' \ - ' arrays: 3; baz, quux, ' \ - 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz...\n' \ - ' groups: 3; bar, foo, ' \ - 'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy...\n' \ - ' store: dict' - actual = repr(g) - eq(expect, actual) - def test_setitem(self): g = self.create_group() try: @@ -775,13 +778,6 @@ class TestGroupWithDictStore(TestGroup): def create_store(): return DictStore(), None - def test_group_repr(self): - g = self.create_group() - expect = 'Group(/, 0)\n store: DictStore' - actual = repr(g) - for l1, l2 in zip(expect.split('\n'), actual.split('\n')): - eq(l1, l2) - def rmtree(p, f=shutil.rmtree, g=os.path.isdir): # pragma: no cover """Version of rmtree that will work atexit and only remove if directory.""" @@ -798,14 +794,6 @@ def create_store(): store = DirectoryStore(path) return store, None - def test_group_repr(self): - g = self.create_group() - expect = 'Group(/, 0)\n' \ - ' store: DirectoryStore' - actual = repr(g) - for l1, l2 in zip(expect.split('\n'), actual.split('\n')): - eq(l1, l2) - class TestGroupWithZipStore(TestGroup): @@ -816,14 +804,6 @@ def create_store(): store = ZipStore(path) return store, None - def test_group_repr(self): - g = self.create_group() - expect = 'Group(/, 0)\n' \ - ' store: ZipStore' - actual = repr(g) - for l1, l2 in zip(expect.split('\n'), actual.split('\n')): - eq(l1, l2) - class TestGroupWithChunkStore(TestGroup): @@ -831,15 +811,6 @@ class TestGroupWithChunkStore(TestGroup): def create_store(): return dict(), dict() - def test_group_repr(self): - if not PY2: - g = self.create_group() - expect = 'Group(/, 0)\n' \ - ' store: dict; chunk_store: dict' - actual = repr(g) - for l1, l2 in zip(expect.split('\n'), actual.split('\n')): - eq(l1, l2) - def test_chunk_store(self): # setup store, chunk_store = self.create_store() diff --git a/zarr/tests/test_info.py b/zarr/tests/test_info.py new file mode 100644 index 0000000000..497f0e58b9 --- /dev/null +++ b/zarr/tests/test_info.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function, division + + +from nose.tools import assert_list_equal + + +import zarr +import numcodecs + + +def test_info(): + + # setup + g = zarr.group(store=dict(), chunk_store=dict(), + synchronizer=zarr.ThreadSynchronizer()) + g.create_group('foo') + z = g.zeros('bar', shape=10, filters=[numcodecs.Adler32()]) + + # test group info + items = g.info_items() + keys = sorted([k for k, _ in items]) + expected_keys = sorted([ + 'Type', 'Read-only', 'Synchronizer type', 'Store type', 'Chunk store type', + 'No. members', 'No. arrays', 'No. groups', 'Arrays', 'Groups', 'Name' + ]) + assert_list_equal(expected_keys, keys) + + # test array info + items = z.info_items() + keys = sorted([k for k, _ in items]) + expected_keys = sorted([ + 'Type', 'Data type', 'Shape', 'Chunk shape', 'Order', 'Read-only', 'Filter [0]', + 'Compressor', 'Synchronizer type', 'Store type', 'Chunk store type', 'No. bytes', + 'No. bytes stored', 'Storage ratio', 'Chunks initialized', 'Name' + ]) + assert_list_equal(expected_keys, keys) diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py index 6efb3e592a..fadcd73e6f 100644 --- a/zarr/tests/test_sync.py +++ b/zarr/tests/test_sync.py @@ -8,8 +8,6 @@ from multiprocessing import Pool as ProcessPool from multiprocessing import cpu_count import tempfile -import traceback -import sys import numpy as np @@ -24,8 +22,6 @@ from zarr.core import Array from zarr.attrs import Attributes from zarr.storage import init_array, DirectoryStore, init_group, atexit_rmtree -from zarr.compat import PY2 -from zarr.codecs import Zlib from zarr.hierarchy import Group @@ -82,7 +78,6 @@ def test_parallel_setitem(self): results = pool.map(_set_arange, zip([arr] * n, range(n)), chunksize=1) results = sorted(results) - print(results) eq(list(range(n)), results) assert_array_equal(np.arange(n * 1000), arr[:]) @@ -100,7 +95,6 @@ def test_parallel_append(self): results = pool.map(_append, zip([arr] * n, range(n)), chunksize=1) results = sorted(results) - print(results) eq([((i+2)*1000,) for i in range(n)], results) eq(((n+1)*1000,), arr.shape) @@ -115,21 +109,6 @@ def create_array(self, read_only=False, **kwargs): return Array(store, synchronizer=ThreadSynchronizer(), read_only=read_only) - def test_repr(self): - if not PY2: - - z = self.create_array(shape=100, chunks=10, dtype='f4', - compressor=Zlib(1)) - # flake8: noqa - expect = """Array((100,), float32, chunks=(10,), order=C) - nbytes: 400; nbytes_stored: 245; ratio: 1.6; initialized: 0/10 - compressor: Zlib(level=1) - store: dict; synchronizer: ThreadSynchronizer -""" - actual = repr(z) - for l1, l2 in zip(expect.split('\n'), actual.split('\n')): - eq(l1, l2) - def create_pool(self): pool = ThreadPool(cpu_count()) return pool @@ -148,21 +127,6 @@ def create_array(self, read_only=False, **kwargs): return Array(store, synchronizer=synchronizer, read_only=read_only, cache_metadata=False) - def test_repr(self): - if not PY2: - - z = self.create_array(shape=100, chunks=10, dtype='f4', - compressor=Zlib(1)) - # flake8: noqa - expect = """Array((100,), float32, chunks=(10,), order=C) - nbytes: 400; nbytes_stored: 245; ratio: 1.6; initialized: 0/10 - compressor: Zlib(level=1) - store: DirectoryStore; synchronizer: ProcessSynchronizer -""" - actual = repr(z) - for l1, l2 in zip(expect.split('\n'), actual.split('\n')): - eq(l1, l2) - def create_pool(self): pool = ProcessPool(processes=cpu_count()) return pool @@ -199,7 +163,6 @@ def test_parallel_create_group(self): pool.close() pool.terminate() - print(results) eq(n, len(g)) pool.terminate() @@ -221,7 +184,6 @@ def test_parallel_require_group(self): pool.close() pool.terminate() - print(results) eq(n//10, len(g)) pool.terminate() @@ -243,15 +205,6 @@ def create_pool(self): pool = ThreadPool(cpu_count()) return pool - def test_group_repr(self): - if not PY2: - g = self.create_group() - expect = 'Group(/, 0)\n' \ - ' store: dict; synchronizer: ThreadSynchronizer' - actual = repr(g) - for l1, l2 in zip(expect.split('\n'), actual.split('\n')): - eq(l1, l2) - def test_synchronizer_property(self): g = self.create_group() assert_is_instance(g.synchronizer, ThreadSynchronizer) @@ -281,15 +234,6 @@ def create_pool(self): pool = ProcessPool(processes=cpu_count()) return pool - def test_group_repr(self): - if not PY2: - g = self.create_group() - expect = 'Group(/, 0)\n' \ - ' store: DirectoryStore; synchronizer: ProcessSynchronizer' - actual = repr(g) - for l1, l2 in zip(expect.split('\n'), actual.split('\n')): - eq(l1, l2) - def test_synchronizer_property(self): g = self.create_group() assert_is_instance(g.synchronizer, ProcessSynchronizer) diff --git a/zarr/tests/test_util.py b/zarr/tests/test_util.py index fe4d7aaf05..7929ee142d 100644 --- a/zarr/tests/test_util.py +++ b/zarr/tests/test_util.py @@ -5,10 +5,9 @@ from nose.tools import eq_ as eq, assert_raises, assert_true, assert_false, \ assert_is_instance - -from zarr.util import normalize_shape, normalize_chunks, is_total_slice, \ - normalize_axis_selection, normalize_array_selection, \ - normalize_resize_args, human_readable_size, normalize_order, guess_chunks +from zarr.util import normalize_shape, normalize_chunks, is_total_slice, normalize_axis_selection, \ + normalize_array_selection, normalize_resize_args, human_readable_size, normalize_order, \ + guess_chunks, info_html_report, info_text_report def test_normalize_shape(): @@ -194,3 +193,16 @@ def test_guess_chunks(): chunks = guess_chunks((1000000,), 40000000) assert_is_instance(chunks, tuple) eq((1,), chunks) + + +def test_info_text_report(): + items = [('foo', 'bar'), ('baz', 'qux')] + expect = "foo : bar\nbaz : qux\n" + eq(expect, info_text_report(items)) + + +def test_info_html_report(): + items = [('foo', 'bar'), ('baz', 'qux')] + actual = info_html_report(items) + eq('', actual[-8:]) diff --git a/zarr/util.py b/zarr/util.py index 8419d06ae2..a3311a4854 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division import operator +from textwrap import TextWrapper import numpy as np @@ -249,6 +250,15 @@ def normalize_order(order): def normalize_storage_path(path): + + # handle bytes + if not PY2 and isinstance(path, bytes): + path = str(path, 'ascii') + + # ensure str + if path is not None and not isinstance(path, str): + path = str(path) + if path: # convert backslash to forward slash @@ -293,3 +303,44 @@ def buffer_size(v): else: v = memoryview(v) return reduce(operator.mul, v.shape) * v.itemsize + + +def info_text_report(items): + keys = [k for k, v in items] + max_key_len = max(len(k) for k in keys) + report = '' + for k, v in items: + wrapper = TextWrapper(width=80, + initial_indent=k.ljust(max_key_len) + ' : ', + subsequent_indent=' '*max_key_len + ' : ') + text = wrapper.fill(str(v)) + report += text + '\n' + return report + + +def info_html_report(items): + report = '' + report += '' + for k, v in items: + report += '' \ + '' \ + '' \ + '' \ + % (k, v) + report += '' + report += '
%s%s
' + return report + + +class InfoReporter(object): + + def __init__(self, obj): + self.obj = obj + + def __repr__(self): + items = self.obj.info_items() + return info_text_report(items) + + def _repr_html_(self): + items = self.obj.info_items() + return info_html_report(items)