Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions docs/api/storage.rst
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
Storage (``zarr.storage``)
==========================
.. module:: zarr.storage

This module contains storage classes for use with Zarr arrays and groups.
However, note that any object implementing the ``MutableMapping`` interface
can be used as a Zarr array store.
.. automodule:: zarr.storage

.. autofunction:: init_array
.. autofunction:: init_group

.. autoclass:: DictStore
.. autoclass:: DirectoryStore
.. autoclass:: TempStore
.. autoclass:: NestedDirectoryStore
.. autoclass:: ZipStore

.. automethod:: close
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ setenv =
py34,py35,py36: PY_MAJOR_VERSION = py3
py27: PY_MAJOR_VERSION = py2
commands =
python -c 'import glob; import shutil; import os; [(shutil.rmtree(d) if os.path.isdir(d) else os.remove(d) if os.path.isfile(d) else None) for d in glob.glob("./example*")]'
py27,py34,py35: nosetests -v --with-coverage --cover-erase --cover-package=zarr zarr
py36: nosetests -v --with-coverage --cover-erase --cover-package=zarr --with-doctest --doctest-options=+NORMALIZE_WHITESPACE,+ELLIPSIS zarr
py36: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst
Expand Down
6 changes: 3 additions & 3 deletions zarr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@


from zarr.core import Array
from zarr.creation import empty, zeros, ones, full, array, empty_like, \
zeros_like, ones_like, full_like, open, open_array, open_like, create
from zarr.storage import DictStore, DirectoryStore, ZipStore, TempStore
from zarr.creation import empty, zeros, ones, full, array, empty_like, zeros_like, ones_like, \
full_like, open, open_array, open_like, create
from zarr.storage import DictStore, DirectoryStore, ZipStore, TempStore, NestedDirectoryStore
from zarr.hierarchy import group, open_group, Group
from zarr.sync import ThreadSynchronizer, ProcessSynchronizer
from zarr.codecs import *
Expand Down
132 changes: 129 additions & 3 deletions zarr/storage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# -*- coding: utf-8 -*-
"""
This module contains storage classes for use with Zarr arrays and groups. Note that any object
implementing the ``MutableMapping`` interface can be used as a Zarr array store.

"""
from __future__ import absolute_import, print_function, division
from collections import MutableMapping
import os
Expand All @@ -7,6 +12,7 @@
import zipfile
import shutil
import atexit
import re


import numpy as np
Expand All @@ -26,6 +32,7 @@
group_meta_key = '.zgroup'
attrs_key = '.zattrs'
try:
# noinspection PyUnresolvedReferences
from zarr.codecs import Blosc
default_compressor = Blosc()
except ImportError: # pragma: no cover
Expand Down Expand Up @@ -376,8 +383,10 @@ def ensure_bytes(s):
return s
if isinstance(s, np.ndarray):
if PY2: # pragma: py3 no cover
# noinspection PyArgumentList
return s.tostring(order='Any')
else: # pragma: py2 no cover
# noinspection PyArgumentList
return s.tobytes(order='Any')
if hasattr(s, 'tobytes'):
return s.tobytes()
Expand Down Expand Up @@ -535,14 +544,13 @@ def getsize(self, path=None):
path = normalize_storage_path(path)

# obtain value to return size of
value = self.root
if path:
try:
parent, key = self._get_parent(path)
value = parent[key]
except KeyError:
err_path_not_found(path)
else:
value = self.root

# obtain size of value
if isinstance(value, self.cls):
Expand Down Expand Up @@ -691,11 +699,15 @@ def __iter__(self):
def __len__(self):
return sum(1 for _ in self.keys())

def listdir(self, path=None):
def dir_path(self, path=None):
store_path = normalize_storage_path(path)
dir_path = self.path
if store_path:
dir_path = os.path.join(dir_path, store_path)
return dir_path

def listdir(self, path=None):
dir_path = self.dir_path(path)
if os.path.isdir(dir_path):
return sorted(os.listdir(dir_path))
else:
Expand Down Expand Up @@ -739,12 +751,126 @@ def atexit_rmtree(path,
class TempStore(DirectoryStore):
"""Directory store using a temporary directory for storage."""

# noinspection PyShadowingBuiltins
def __init__(self, suffix='', prefix='zarr', dir=None):
path = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir)
atexit.register(atexit_rmtree, path)
super(TempStore, self).__init__(path)


_prog_ckey = re.compile(r'^(\d+)(\.\d+)+$')
_prog_number = re.compile(r'^\d+$')


def _map_ckey(key):
segments = list(key.split('/'))
if segments:
last_segment = segments[-1]
if _prog_ckey.match(last_segment):
last_segment = last_segment.replace('.', '/')
segments = segments[:-1] + [last_segment]
key = '/'.join(segments)
return key


class NestedDirectoryStore(DirectoryStore):
"""Mutable Mapping interface to a directory, with special handling for chunk keys so that
chunk files for multidimensional arrays are stored in a nested directory tree. Keys must be
strings, values must be bytes-like objects.

Parameters
----------
path : string
Location of directory.

Examples
--------
Most keys are mapped to file paths as normal, e.g.::

>>> import zarr
>>> store = zarr.NestedDirectoryStore('example_nested_store')
>>> store['foo'] = b'bar'
>>> store['foo']
b'bar'
>>> store['a/b/c'] = b'xxx'
>>> store['a/b/c']
b'xxx'
>>> open('example_nested_store/foo', 'rb').read()
b'bar'
>>> open('example_nested_store/a/b/c', 'rb').read()
b'xxx'

Chunk keys are handled in a special way, such that the '.' characters in the key are mapped to
directory path separators internally. E.g.::

>>> store['bar/0.0'] = b'yyy'
>>> store['bar/0.0']
b'yyy'
>>> store['baz/2.1.12'] = b'zzz'
>>> store['baz/2.1.12']
b'zzz'
>>> open('example_nested_store/bar/0/0', 'rb').read()
b'yyy'
>>> open('example_nested_store/baz/2/1/12', 'rb').read()
b'zzz'

Notes
-----
The standard DirectoryStore class stores all chunk files for an array together in a single
directory. On some file systems the potentially large number of files in a single directory
can cause performance issues. The NestedDirectoryStore class provides an alternative where
chunk files for multidimensional arrays will be organised into a directory hierarchy,
thus reducing the number of files in any one directory.

"""

def __init__(self, path):
super(NestedDirectoryStore, self).__init__(path)

def __getitem__(self, key):
key = _map_ckey(key)
return super(NestedDirectoryStore, self).__getitem__(key)

def __setitem__(self, key, value):
key = _map_ckey(key)
super(NestedDirectoryStore, self).__setitem__(key, value)

def __delitem__(self, key):
key = _map_ckey(key)
super(NestedDirectoryStore, self).__delitem__(key)

def __contains__(self, key):
key = _map_ckey(key)
return super(NestedDirectoryStore, self).__contains__(key)

def __eq__(self, other):
return (
isinstance(other, NestedDirectoryStore) and
self.path == other.path
)

def listdir(self, path=None):
children = super(NestedDirectoryStore, self).listdir(path=path)
if array_meta_key in children:
# special handling of directories containing an array to map nested chunk keys back
# to standard chunk keys
new_children = []
root_path = self.dir_path(path)
for entry in children:
entry_path = os.path.join(root_path, entry)
if _prog_number.match(entry) and os.path.isdir(entry_path):
for dir_path, _, file_names in os.walk(entry_path):
for file_name in file_names:
file_path = os.path.join(dir_path, file_name)
rel_path = file_path.split(root_path + os.path.sep)[1]
new_children.append(rel_path.replace(os.path.sep, '.'))
else:
new_children.append(entry)
return sorted(new_children)
else:
return children


# noinspection PyPep8Naming
class ZipStore(MutableMapping):
"""Mutable Mapping interface to a Zip file. Keys must be strings,
Expand Down
14 changes: 13 additions & 1 deletion zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
assert_raises, assert_true, assert_false, assert_is, assert_is_none


from zarr.storage import DirectoryStore, init_array, init_group
from zarr.storage import DirectoryStore, init_array, init_group, NestedDirectoryStore
from zarr.core import Array
from zarr.errors import PermissionError
from zarr.compat import PY2
Expand Down Expand Up @@ -713,6 +713,18 @@ def test_nbytes_stored(self):
eq(expect_nbytes_stored, z.nbytes_stored)


class TestArrayWithNestedDirectoryStore(TestArrayWithDirectoryStore):

@staticmethod
def create_array(read_only=False, **kwargs):
path = mkdtemp()
atexit.register(shutil.rmtree, path)
store = NestedDirectoryStore(path)
kwargs.setdefault('compressor', Zlib(1))
init_array(store, **kwargs)
return Array(store, read_only=read_only)


class TestArrayWithNoCompressor(TestArray):

def create_array(self, read_only=False, **kwargs):
Expand Down
12 changes: 11 additions & 1 deletion zarr/tests/test_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


from zarr.storage import DictStore, DirectoryStore, ZipStore, init_group, \
init_array, attrs_key, array_meta_key, group_meta_key, atexit_rmtree
init_array, attrs_key, array_meta_key, group_meta_key, atexit_rmtree, NestedDirectoryStore
from zarr.core import Array
from zarr.hierarchy import Group, group, open_group
from zarr.attrs import Attributes
Expand Down Expand Up @@ -791,6 +791,16 @@ def create_store():
return store, None


class TestGroupWithNestedDirectoryStore(TestGroup):

@staticmethod
def create_store():
path = tempfile.mkdtemp()
atexit.register(atexit_rmtree, path)
store = NestedDirectoryStore(path)
return store, None


class TestGroupWithZipStore(TestGroup):

@staticmethod
Expand Down
24 changes: 22 additions & 2 deletions zarr/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@

from zarr.storage import init_array, array_meta_key, attrs_key, DictStore, \
DirectoryStore, ZipStore, init_group, group_meta_key, getsize, \
migrate_1to2, TempStore, atexit_rmtree
migrate_1to2, TempStore, atexit_rmtree, NestedDirectoryStore, default_compressor
from zarr.meta import decode_array_metadata, encode_array_metadata, \
ZARR_FORMAT, decode_group_metadata, encode_group_metadata
from zarr.compat import text_type
from zarr.storage import default_compressor
from zarr.codecs import Zlib, Blosc, BZ2
from zarr.errors import PermissionError
from zarr.hierarchy import group
Expand Down Expand Up @@ -616,6 +615,27 @@ def test_setdel(self):
setdel_hierarchy_checks(store)


class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase):

def create_store(self):
path = tempfile.mkdtemp()
atexit.register(atexit_rmtree, path)
store = NestedDirectoryStore(path)
return store

def test_chunk_nesting(self):
store = self.create_store()
# any path where last segment looks like a chunk key gets special handling
store['0.0'] = b'xxx'
eq(b'xxx', store['0.0'])
eq(b'xxx', store['0/0'])
store['foo/10.20.30'] = b'yyy'
eq(b'yyy', store['foo/10.20.30'])
eq(b'yyy', store['foo/10/20/30'])
store['42'] = b'zzz'
eq(b'zzz', store['42'])


class TestTempStore(StoreTests, unittest.TestCase):

def create_store(self):
Expand Down