Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
021bd44
add store routines for getting bytes and json
d-v-b Jan 4, 2026
61cf6d0
Merge branch 'main' into feat/get_json
d-v-b Jan 4, 2026
7d26b8e
check for FileNotFoundError when a key is missing
d-v-b Jan 4, 2026
971c3e4
remove storepath methods
d-v-b Jan 4, 2026
b7f7e38
Merge branch 'feat/get_json' of https://github.com/d-v-b/zarr-python …
d-v-b Jan 4, 2026
d70a5e5
changelog
d-v-b Jan 4, 2026
a213058
rename methods
d-v-b Jan 8, 2026
38ff517
continue renaming / test refactoring
d-v-b Jan 8, 2026
bdc4ef8
refactor new test functions
d-v-b Jan 8, 2026
e8ca484
Merge branch 'main' into feat/get_json
d-v-b Jan 8, 2026
0a97eb4
Merge branch 'main' into feat/get_json
d-v-b Jan 8, 2026
b110768
add BufferLike as buffer parameter for store methods that allocate me…
d-v-b Jan 8, 2026
6b9de9d
implement default on store abc
d-v-b Jan 8, 2026
a3283a9
Merge branch 'main' into feat/default-buffer
d-v-b Jan 8, 2026
281538a
consolidate prototype testing
d-v-b Jan 8, 2026
68a449e
Merge branch 'main' into feat/default-buffer
d-v-b Jan 9, 2026
6bba68f
Merge branch 'main' into feat/default-buffer
d-v-b Jan 12, 2026
a76d984
Merge branch 'main' into feat/default-buffer
d-v-b Jan 16, 2026
b205b35
Merge branch 'main' into feat/default-buffer
d-v-b Jan 16, 2026
44c5882
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Jan 26, 2026
6904239
remove as much as possible default_buffer_prototype() invocation
d-v-b Jan 26, 2026
3299e07
Merge branch 'main' into feat/default-buffer
d-v-b Jan 26, 2026
1f0322f
remove incorrect release note and add one for the changes here
d-v-b Jan 26, 2026
c8061bf
fix mysterious linting errors
d-v-b Jan 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changes/3644.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
The `Store.get` and `Store.get_partial_values` methods now accept `None` as the `prototype` argument. When `prototype` is `None`, stores will use their default buffer class (typically `zarr.core.buffer.cpu.Buffer`). This simplifies the API for common use cases where the default buffer is sufficient.

A new type alias `BufferClassLike` has been added, which accepts either a `Buffer` class or a `BufferPrototype` instance.

**Breaking change for third-party store implementations:** If you have implemented a custom `Store` subclass, you must update your `get` and `get_partial_values` methods to handle `prototype=None`. To do this, override the `_get_default_buffer_class` method to return an appropriate default `Buffer` class, and update your method signatures to accept `BufferClassLike | None` instead of `BufferPrototype`. When `prototype` is `None`, call `self._get_default_buffer_class()` to obtain the buffer class. If `prototype` is a `BufferPrototype` instance, extract the buffer class via `prototype.buffer`.
103 changes: 75 additions & 28 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@
from itertools import starmap
from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable

from zarr.core.buffer import Buffer, BufferPrototype
from zarr.core.sync import sync
from zarr.registry import get_buffer_class

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, AsyncIterator, Iterable
from types import TracebackType
from typing import Any, Self, TypeAlias

from zarr.core.buffer import Buffer, BufferPrototype
__all__ = ["BufferClassLike", "ByteGetter", "ByteSetter", "Store", "set_or_delete"]

__all__ = ["ByteGetter", "ByteSetter", "Store", "set_or_delete"]
BufferClassLike = type[Buffer] | BufferPrototype
"""An object that is or contains a Buffer class"""


@dataclass
Expand Down Expand Up @@ -183,20 +186,30 @@ def __eq__(self, value: object) -> bool:
"""Equality comparison."""
...

def _get_default_buffer_class(self) -> type[Buffer]:
"""
Get the default buffer class.
"""
return get_buffer_class()

@abstractmethod
async def get(
self,
key: str,
prototype: BufferPrototype,
prototype: BufferClassLike | None = None,
byte_range: ByteRequest | None = None,
) -> Buffer | None:
"""Retrieve the value associated with a given key.
Parameters
----------
key : str
prototype : BufferPrototype
The prototype of the output buffer. Stores may support a default buffer prototype.
prototype : BufferLike | None, optional
The prototype of the output buffer.
Can be either a Buffer class or an instance of `BufferPrototype`, in which the
`buffer` attribute will be used.
If `None`, the default buffer class for this store will be retrieved via the
``_get_default_buffer_class`` method.
byte_range : ByteRequest, optional
ByteRequest may be one of the following. If not provided, all data associated with the key is retrieved.
- RangeByteRequest(int, int): Request a specific range of bytes in the form (start, end). The end is exclusive. If the given range is zero-length or starts after the end of the object, an error will be returned. Additionally, if the range ends after the end of the object, the entire remainder of the object will be returned. Otherwise, the exact requested range will be returned.
Expand All @@ -210,7 +223,11 @@ async def get(
...

async def _get_bytes(
self, key: str, *, prototype: BufferPrototype, byte_range: ByteRequest | None = None
self,
key: str,
*,
prototype: BufferClassLike | None = None,
byte_range: ByteRequest | None = None,
) -> bytes:
"""
Retrieve raw bytes from the store asynchronously.
Expand All @@ -222,8 +239,12 @@ async def _get_bytes(
----------
key : str
The key identifying the data to retrieve.
prototype : BufferPrototype
The buffer prototype to use for reading the data.
prototype : BufferLike | None, optional
The prototype of the output buffer.
Can be either a Buffer class or an instance of `BufferPrototype`, in which the
`buffer` attribute will be used.
If `None`, the default buffer prototype for this store will be retrieved via the
``_get_default_buffer_class`` method.
byte_range : ByteRequest, optional
If specified, only retrieve a portion of the stored data.
Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``.
Expand All @@ -248,7 +269,7 @@ async def _get_bytes(
--------
>>> store = await MemoryStore.open()
>>> await store.set("data", Buffer.from_bytes(b"hello world"))
>>> data = await store.get_bytes("data", prototype=default_buffer_prototype())
>>> data = await store._get_bytes("data")
>>> print(data)
b'hello world'
"""
Expand All @@ -258,7 +279,11 @@ async def _get_bytes(
return buffer.to_bytes()

def _get_bytes_sync(
self, key: str = "", *, prototype: BufferPrototype, byte_range: ByteRequest | None = None
self,
key: str = "",
*,
prototype: BufferClassLike | None = None,
byte_range: ByteRequest | None = None,
) -> bytes:
"""
Retrieve raw bytes from the store synchronously.
Expand All @@ -271,8 +296,12 @@ def _get_bytes_sync(
----------
key : str, optional
The key identifying the data to retrieve. Defaults to an empty string.
prototype : BufferPrototype
The buffer prototype to use for reading the data.
prototype : BufferLike | None, optional
The prototype of the output buffer.
Can be either a Buffer class or an instance of `BufferPrototype`, in which the
`buffer` attribute will be used.
If `None`, the default buffer prototype for this store will be retrieved via the
``_get_default_buffer_class`` method.
byte_range : ByteRequest, optional
If specified, only retrieve a portion of the stored data.
Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``.
Expand Down Expand Up @@ -301,15 +330,19 @@ def _get_bytes_sync(
--------
>>> store = MemoryStore()
>>> await store.set("data", Buffer.from_bytes(b"hello world"))
>>> data = store.get_bytes_sync("data", prototype=default_buffer_prototype())
>>> data = store._get_bytes_sync("data")
>>> print(data)
b'hello world'
"""

return sync(self._get_bytes(key, prototype=prototype, byte_range=byte_range))

async def _get_json(
self, key: str, *, prototype: BufferPrototype, byte_range: ByteRequest | None = None
self,
key: str,
*,
prototype: BufferClassLike | None = None,
byte_range: ByteRequest | None = None,
) -> Any:
"""
Retrieve and parse JSON data from the store asynchronously.
Expand All @@ -321,8 +354,12 @@ async def _get_json(
----------
key : str
The key identifying the JSON data to retrieve.
prototype : BufferPrototype
The buffer prototype to use for reading the data.
prototype : BufferLike | None, optional
The prototype of the output buffer.
Can be either a Buffer class or an instance of `BufferPrototype`, in which the
`buffer` attribute will be used.
If `None`, the default buffer prototype for this store will be retrieved via the
``_get_default_buffer_class`` method.
byte_range : ByteRequest, optional
If specified, only retrieve a portion of the stored data.
Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``.
Expand Down Expand Up @@ -351,15 +388,19 @@ async def _get_json(
>>> store = await MemoryStore.open()
>>> metadata = {"zarr_format": 3, "node_type": "array"}
>>> await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode()))
>>> data = await store.get_json("zarr.json", prototype=default_buffer_prototype())
>>> data = await store._get_json("zarr.json")
>>> print(data)
{'zarr_format': 3, 'node_type': 'array'}
"""

return json.loads(await self._get_bytes(key, prototype=prototype, byte_range=byte_range))

def _get_json_sync(
self, key: str = "", *, prototype: BufferPrototype, byte_range: ByteRequest | None = None
self,
key: str = "",
*,
prototype: BufferClassLike | None = None,
byte_range: ByteRequest | None = None,
) -> Any:
"""
Retrieve and parse JSON data from the store synchronously.
Expand All @@ -372,8 +413,12 @@ def _get_json_sync(
----------
key : str, optional
The key identifying the JSON data to retrieve. Defaults to an empty string.
prototype : BufferPrototype
The buffer prototype to use for reading the data.
prototype : BufferLike | None, optional
The prototype of the output buffer.
Can be either a Buffer class or an instance of `BufferPrototype`, in which the
`buffer` attribute will be used.
If `None`, the default buffer prototype for this store will be retrieved via the
``_get_default_buffer_class`` method.
byte_range : ByteRequest, optional
If specified, only retrieve a portion of the stored data.
Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``.
Expand Down Expand Up @@ -407,7 +452,7 @@ def _get_json_sync(
>>> store = MemoryStore()
>>> metadata = {"zarr_format": 3, "node_type": "array"}
>>> store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode()))
>>> data = store.get_json_sync("zarr.json", prototype=default_buffer_prototype())
>>> data = store._get_json_sync("zarr.json")
>>> print(data)
{'zarr_format': 3, 'node_type': 'array'}
"""
Expand All @@ -417,15 +462,19 @@ def _get_json_sync(
@abstractmethod
async def get_partial_values(
self,
prototype: BufferPrototype,
prototype: BufferClassLike | None,
key_ranges: Iterable[tuple[str, ByteRequest | None]],
) -> list[Buffer | None]:
"""Retrieve possibly partial values from given key_ranges.
Parameters
----------
prototype : BufferPrototype
The prototype of the output buffer. Stores may support a default buffer prototype.
prototype : BufferLike | None
The prototype of the output buffer.
Can be either a Buffer class or an instance of `BufferPrototype`, in which the
`buffer` attribute will be used.
If `None`, the default buffer prototype for this store will be retrieved via the
``_get_default_buffer_class`` method.
key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]]
Ordered set of key, range pairs, a key may occur multiple times with different ranges
Expand Down Expand Up @@ -597,7 +646,7 @@ def close(self) -> None:
self._is_open = False

async def _get_many(
self, requests: Iterable[tuple[str, BufferPrototype, ByteRequest | None]]
self, requests: Iterable[tuple[str, BufferClassLike | None, ByteRequest | None]]
) -> AsyncGenerator[tuple[str, Buffer | None], None]:
"""
Retrieve a collection of objects from storage. In general this method does not guarantee
Expand Down Expand Up @@ -628,10 +677,8 @@ async def getsize(self, key: str) -> int:
# Note to implementers: this default implementation is very inefficient since
# it requires reading the entire object. Many systems will have ways to get the
# size of an object without reading it.
# avoid circular import
from zarr.core.buffer.core import default_buffer_prototype

value = await self.get(key, prototype=default_buffer_prototype())
value = await self.get(key)
if value is None:
raise FileNotFoundError(key)
return len(value)
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/core/_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def byte_info(size: int) -> str:


@dataclasses.dataclass(kw_only=True, frozen=True, slots=True)
class ArrayInfo:
class ArrayInfo: # type: ignore[misc]
"""
Visual summary for an Array.

Expand Down
17 changes: 17 additions & 0 deletions src/zarr/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@

from typing_extensions import ReadOnly

from zarr.core.buffer import Buffer, BufferPrototype
from zarr.core.config import config as zarr_config
from zarr.errors import ZarrRuntimeWarning

if TYPE_CHECKING:
from collections.abc import Awaitable, Callable, Iterator

from zarr.abc.store import BufferClassLike


ZARR_JSON = "zarr.json"
ZARRAY_JSON = ".zarray"
Expand Down Expand Up @@ -246,3 +249,17 @@ def _warn_order_kwarg() -> None:
def _default_zarr_format() -> ZarrFormat:
"""Return the default zarr_version"""
return cast("ZarrFormat", int(zarr_config.get("default_zarr_format", 3)))


def parse_bufferclasslike(obj: BufferClassLike | None) -> type[Buffer]:
"""
Take an optional BufferClassLike and return a Buffer class
"""
# Avoid a circular import. Temporary fix until we re-organize modules appropriately.
from zarr.registry import get_buffer_class

if obj is None:
return get_buffer_class()
if isinstance(obj, BufferPrototype):
return obj.buffer
return obj
4 changes: 2 additions & 2 deletions src/zarr/core/dtype/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def check_dtype_spec_v2(data: object) -> TypeGuard[DTypeSpec_V2]:
DTypeSpec_V3 = str | NamedConfig[str, Mapping[str, object]]


def check_dtype_spec_v3(data: object) -> TypeGuard[DTypeSpec_V3]:
def check_dtype_spec_v3(data: object) -> TypeGuard[DTypeSpec_V3]: # type: ignore[valid-type]
"""
Type guard for narrowing the type of a python object to an instance of
DTypeSpec_V3, i.e either a string or a dict with a "name" field that's a string and a
Expand All @@ -141,7 +141,7 @@ def check_dtype_spec_v3(data: object) -> TypeGuard[DTypeSpec_V3]:
return False


def unpack_dtype_json(data: DTypeSpec_V2 | DTypeSpec_V3) -> DTypeJSON:
def unpack_dtype_json(data: DTypeSpec_V2 | DTypeSpec_V3) -> DTypeJSON: # type: ignore[valid-type]
"""
Return the array metadata form of the dtype JSON representation. For the Zarr V3 form of dtype
metadata, this is a no-op. For the Zarr V2 form of dtype metadata, this unpacks the dtype name.
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/core/dtype/npy/structured.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def to_json(self, zarr_format: ZarrFormat) -> StructuredJSON_V2 | StructuredJSON
elif zarr_format == 3:
v3_unstable_dtype_warning(self)
fields = [
[f_name, f_dtype.to_json(zarr_format=zarr_format)] # type: ignore[list-item]
[f_name, f_dtype.to_json(zarr_format=zarr_format)]
for f_name, f_dtype in self.fields
]
base_dict = {
Expand Down
6 changes: 3 additions & 3 deletions src/zarr/core/dtype/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@


@dataclass(frozen=True, kw_only=True, slots=True)
class ZDType(ABC, Generic[TDType_co, TScalar_co]):
class ZDType(ABC, Generic[TDType_co, TScalar_co]): # type: ignore[misc]
"""
Abstract base class for wrapping native array data types, e.g. numpy dtypes

Expand Down Expand Up @@ -169,10 +169,10 @@ def from_json(cls: type[Self], data: DTypeJSON, *, zarr_format: ZarrFormat) -> S
def to_json(self, zarr_format: Literal[2]) -> DTypeSpec_V2: ...

@overload
def to_json(self, zarr_format: Literal[3]) -> DTypeSpec_V3: ...
def to_json(self, zarr_format: Literal[3]) -> DTypeSpec_V3: ... # type: ignore[valid-type]

@abstractmethod
def to_json(self, zarr_format: ZarrFormat) -> DTypeSpec_V2 | DTypeSpec_V3:
def to_json(self, zarr_format: ZarrFormat) -> DTypeSpec_V2 | DTypeSpec_V3: # type: ignore[valid-type]
"""
Serialize this ZDType to JSON.

Expand Down
Loading
Loading