Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade xarray.to_icechunk and related docs #633

Merged
merged 38 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
4a1628f
Support region="auto" in to_xarray
dcherian Jan 27, 2025
2354738
fix
dcherian Jan 27, 2025
5df083f
fix agian
dcherian Jan 27, 2025
76d1362
lint
dcherian Jan 27, 2025
a1c9479
Support dataarray
dcherian Jan 27, 2025
4471081
Support region="auto"
dcherian Jan 27, 2025
75fa55a
fix
dcherian Jan 27, 2025
cc9791d
write_empty_chunks
dcherian Jan 27, 2025
39594d7
Revert "write_empty_chunks"
dcherian Jan 27, 2025
0628f73
Remove write_empty_chunks
dcherian Jan 27, 2025
d7042d1
lint
dcherian Jan 27, 2025
5f1a9e0
type
dcherian Jan 27, 2025
69495d9
Support safe_chunks
dcherian Jan 27, 2025
cd8ac54
skip one import
dcherian Jan 27, 2025
e7f5eeb
remove unnecessary **kwargs
dcherian Jan 27, 2025
bd25331
full compute support
dcherian Jan 27, 2025
4e9ad54
Minimal support for compute kwarg
dcherian Jan 27, 2025
8603947
Revert "Minimal support for compute kwarg"
dcherian Jan 28, 2025
8402014
Add distributed writes docs
dcherian Jan 28, 2025
6672ae9
sidestep compute=False in tests
dcherian Jan 28, 2025
1848b22
Revert "full compute support"
dcherian Jan 28, 2025
29cc317
Move to distributed.md
dcherian Jan 28, 2025
ec8c486
fix
dcherian Jan 28, 2025
91e4d50
Merge branch 'main' into region-auto
dcherian Jan 31, 2025
ee5250f
Merge branch 'main' into region-auto
dcherian Jan 31, 2025
0894777
cleanup after xarray release
dcherian Jan 31, 2025
e2c83de
update docs
dcherian Jan 31, 2025
ad4a77c
Edits
dcherian Jan 31, 2025
4145223
more edits
dcherian Jan 31, 2025
502fab0
few more edits
dcherian Jan 31, 2025
f3f7682
edit
dcherian Jan 31, 2025
d0a3d9e
fix
dcherian Jan 31, 2025
b4a90d6
to_icehunk takes session
dcherian Jan 31, 2025
ad96ba4
more edits
dcherian Jan 31, 2025
7c244f5
add faq
dcherian Jan 31, 2025
eb3aeb3
Update docs/docs/icechunk-python/parallel.md
dcherian Jan 31, 2025
e27c668
Merge branch 'main' into region-auto
dcherian Jan 31, 2025
baf8344
lint
dcherian Jan 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/docs/scripts/readthedocs.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ ${ config.versions.active.map(
</div>`;

document.querySelector(".md-header__topic").insertAdjacentHTML("beforeend", versioning);
});
});
48 changes: 36 additions & 12 deletions icechunk-python/python/icechunk/xarray.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env python3
import importlib
from collections.abc import Hashable, Mapping, MutableMapping
from dataclasses import dataclass, field
from typing import Any, Literal
from typing import Any, Literal, overload

import numpy as np
from packaging.version import Version
Expand All @@ -13,7 +12,7 @@
from icechunk.dask import stateful_store_reduce
from icechunk.distributed import extract_session, merge_sessions
from icechunk.vendor.xarray import _choose_default_mode
from xarray import Dataset
from xarray import DataArray, Dataset
from xarray.backends.common import ArrayWriter
from xarray.backends.zarr import ZarrStore

Expand Down Expand Up @@ -81,8 +80,6 @@ class XarrayDatasetWriter:
store: IcechunkStore = field(kw_only=True)

safe_chunks: bool = field(kw_only=True, default=True)
# TODO: uncomment when Zarr has support
# write_empty_chunks: bool = field(kw_only=True, default=True)

Copy link
Contributor Author

@dcherian dcherian Jan 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now deprecated, and users are supposed to set it in the Zarr config. So we don't need the kwarg

_initialized: bool = field(default=False, repr=False)

Expand Down Expand Up @@ -115,13 +112,12 @@ def _open_group(
append_dim=append_dim,
write_region=region,
safe_chunks=self.safe_chunks,
# TODO: uncomment when Zarr has support
# write_empty=self.write_empty_chunks,
synchronizer=None,
consolidated=False,
consolidate_on_close=False,
zarr_version=None,
)
self.dataset = self.xarray_store._validate_and_autodetect_region(self.dataset)

def write_metadata(self, encoding: Mapping[Any, Any] | None = None) -> None:
"""
Expand Down Expand Up @@ -194,13 +190,11 @@ def write_lazy(


def to_icechunk(
dataset: Dataset,
obj: DataArray | Dataset,
store: IcechunkStore,
*,
group: str | None = None,
mode: ZarrWriteModes | None = None,
# TODO: uncomment when Zarr has support
# write_empty_chunks: bool | None = None,
safe_chunks: bool = True,
append_dim: Hashable | None = None,
region: Region = None,
Expand All @@ -210,10 +204,12 @@ def to_icechunk(
**kwargs: Any,
) -> None:
"""
Write an Xarray Dataset to a group of an icechunk store.
Write an Xarray object to a group of an icechunk store.

Parameters
----------
obj: DataArray or Dataset
Xarray object to write
store : MutableMapping, str or path-like, optional
Store or path to directory in local or remote file system.
mode : {"w", "w-", "a", "a-", r+", None}, optional
Expand Down Expand Up @@ -289,7 +285,9 @@ def to_icechunk(
``append_dim`` at the same time. To create empty arrays to fill
in with ``region``, use the `XarrayDatasetWriter` directly.
"""
writer = XarrayDatasetWriter(dataset, store=store)

as_dataset = make_dataset(obj)
writer = XarrayDatasetWriter(as_dataset, store=store)

writer._open_group(group=group, mode=mode, append_dim=append_dim, region=region)

Expand All @@ -299,3 +297,29 @@ def to_icechunk(
writer.write_eager()
# eagerly write dask arrays
writer.write_lazy(chunkmanager_store_kwargs=chunkmanager_store_kwargs)


@overload
def make_dataset(obj: DataArray) -> Dataset: ...
def make_dataset(obj: Dataset) -> Dataset: ...
def make_dataset(obj) -> Dataset:
"""Copied from DataArray.to_zarr"""
from xarray.backends.api import DATAARRAY_NAME, DATAARRAY_VARIABLE

if isinstance(obj, Dataset):
return obj

assert isinstance(obj, DataArray)

if obj.name is None:
# If no name is set then use a generic xarray name
dataset = obj.to_dataset(name=DATAARRAY_VARIABLE)
elif obj.name in obj.coords or obj.name in obj.dims:
# The name is the same as one of the coords names, which the netCDF data model
# does not support, so rename it but keep track of the old name
dataset = obj.to_dataset(name=DATAARRAY_VARIABLE)
dataset.attrs[DATAARRAY_NAME] = obj.name
else:
# No problems with the name - so we're fine!
dataset = obj.to_dataset()
return dataset
16 changes: 16 additions & 0 deletions icechunk-python/tests/run_xarray_backends_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
local_filesystem_storage,
s3_storage,
)
from icechunk.xarray import to_icechunk
from tests.xarray_test_compat import ZarrRegionAutoTests
from xarray.tests.test_backends import (
ZarrBase,
default_zarr_format, # noqa: F401; needed otherwise not discovered
Expand Down Expand Up @@ -89,3 +91,17 @@ def create_zarr_target(self) -> Generator[IcechunkStore]:
)
session = repo.writable_session("main")
yield session.store


@pytest.mark.filterwarnings("ignore:Failed to open:RuntimeWarning")
class TestIcechunkRegionAuto(ZarrRegionAutoTests):
@contextlib.contextmanager
def create_zarr_target(self) -> Generator[IcechunkStore]:
if zarr.config.config["default_zarr_format"] == 2:
pytest.skip("v2 not supported")
repo = Repository.create(in_memory_storage())
session = repo.writable_session("main")
yield session.store

def save(self, target, ds, **kwargs):
to_icechunk(ds, target, **kwargs)
Loading
Loading