Skip to content

Commit

Permalink
Add regression test
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed May 9, 2024
1 parent b66452a commit dc33846
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 2 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ nosetests.xml
dask-worker-space/

# asv environments
.asv
asv_bench/.asv
asv_bench/pkgs

# Translations
*.mo
Expand All @@ -68,7 +69,7 @@ dask-worker-space/

# xarray specific
doc/_build
generated/
doc/generated/
xarray/tests/data/*.grib.*.idx

# Sync tools
Expand Down
179 changes: 179 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import tempfile
import uuid
import warnings
from collections import Counter
from collections.abc import Generator, Iterator, Mapping
from contextlib import ExitStack
from io import BytesIO
Expand Down Expand Up @@ -2990,6 +2991,184 @@ def test_chunked_cftime_datetime(self) -> None:
assert original.chunks == actual.chunks


def attach_counter(f):
counter = Counter()

def wrapper(self, *args):
if f.__name__ in ["__contains__", "__getitem__"] and args[0] == "zarr.json":
# ignore this spam
pass
else:
print(f"Calling {f.__name__} with {args}")
counter.update(args[slice(1)]) if args else counter.update(("foo",))
return f(self, *args)

wrapper.counter = counter
return wrapper


class CountingStore(KVStoreV3):
def __init__(self):
super().__init__({})
self.instrumented_methods = [
"__iter__",
"__contains__",
"__setitem__",
"__getitem__",
"listdir",
"list_prefix",
]

@attach_counter
def __iter__(self):
return super().__iter__()

@attach_counter
def listdir(self, *args, **kwargs):
return super().listdir(*args, **kwargs)

@attach_counter
def list_prefix(self, *args, **kwargs):
return super().list_prefix(*args, **kwargs)

@attach_counter
def __contains__(self, key) -> bool:
return super().__contains__(key)

@attach_counter
def __getitem__(self, key):
return super().__getitem__(key)

@attach_counter
def __setitem__(self, *args, **kwargs):
return super().__setitem__(*args, **kwargs)

def summarize(self):
summary = {}
for method in self.instrumented_methods:
name = method.strip("__")
if counter := getattr(self, method).counter:
summary[name] = sum(counter.values())
else:
summary[name] = 0
return summary

def reset(self):
for method in self.instrumented_methods:
getattr(self, method).counter.clear()


@requires_zarr
@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3")
class TestInstrumentedZarrStore:
@contextlib.contextmanager
def create_zarr_target(self):
yield CountingStore()

def check_requests(self, expected, store):
summary = store.summarize()
for k in summary:
assert summary[k] <= expected[k], summary
store.reset()

def test_append(self) -> None:
original = Dataset({"foo": ("x", [1])}, coords={"x": [0]})
modified = Dataset({"foo": ("x", [2])}, coords={"x": [1]})
with self.create_zarr_target() as store:
original.to_zarr(store)
expected = {
"iter": 2,
"contains": 9,
"setitem": 9,
"getitem": 6,
"listdir": 2,
"list_prefix": 2,
}
self.check_requests(expected, store)

Check failure on line 3087 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9 min-all-deps

TestInstrumentedZarrStore.test_append AssertionError: {'contains': 12, 'getitem': 6, 'iter': 2, 'list_prefix': 2, ...} assert 12 <= 9

modified.to_zarr(store, mode="a", append_dim="x")
# v2024.03.0: {'iter': 6, 'contains': 2, 'setitem': 5, 'getitem': 10, 'listdir': 6, 'list_prefix': 0}
# 6057128b: {'iter': 5, 'contains': 2, 'setitem': 5, 'getitem': 10, "listdir": 5, "list_prefix": 0}
expected = {
"iter": 2,
"contains": 9,
"setitem": 8,
"getitem": 6,
"listdir": 2,
"list_prefix": 2,
}
self.check_requests(expected, store)

modified.to_zarr(store, mode="a-", append_dim="x")
expected = {
"iter": 2,
"contains": 2,
"setitem": 5,
"getitem": 6,
"listdir": 2,
"list_prefix": 2,
}
self.check_requests(expected, store)

with open_dataset(store, engine="zarr") as actual:
assert_identical(
actual, xr.concat([original, modified, modified], dim="x")
)

@requires_dask
def test_region_write(self) -> None:
ds = Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]}).chunk()
with self.create_zarr_target() as store:
ds.to_zarr(store, mode="w", compute=False)
expected = {
"iter": 2,
"contains": 7,
"setitem": 9,
"getitem": 6,
"listdir": 2,
"list_prefix": 4,
}
self.check_requests(expected, store)

Check failure on line 3131 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9

TestInstrumentedZarrStore.test_region_write AssertionError: {'contains': 9, 'getitem': 15, 'iter': 3, 'list_prefix': 4, ...} assert 3 <= 2

Check failure on line 3131 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.12

TestInstrumentedZarrStore.test_region_write AssertionError: {'contains': 9, 'getitem': 15, 'iter': 3, 'list_prefix': 4, ...} assert 3 <= 2

Check failure on line 3131 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.9

TestInstrumentedZarrStore.test_region_write AssertionError: {'contains': 9, 'getitem': 15, 'iter': 3, 'list_prefix': 4, ...} assert 3 <= 2

Check failure on line 3131 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12

TestInstrumentedZarrStore.test_region_write AssertionError: {'contains': 9, 'getitem': 15, 'iter': 3, 'list_prefix': 4, ...} assert 3 <= 2

Check failure on line 3131 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9 min-all-deps

TestInstrumentedZarrStore.test_region_write AssertionError: {'contains': 22, 'getitem': 12, 'iter': 4, 'list_prefix': 6, ...} assert 4 <= 2

ds.to_zarr(store, region={"x": slice(None)})
# v2024.03.0: {'iter': 5, 'contains': 2, 'setitem': 1, 'getitem': 6, 'listdir': 5, 'list_prefix': 0}
# 6057128b: {'iter': 4, 'contains': 2, 'setitem': 1, 'getitem': 5, 'listdir': 4, 'list_prefix': 0}
expected = {
"iter": 2,
"contains": 2,
"setitem": 1,
"getitem": 3,
"listdir": 2,
"list_prefix": 0,
}
self.check_requests(expected, store)

ds.to_zarr(store, region="auto")
# v2024.03.0: {'iter': 6, 'contains': 4, 'setitem': 1, 'getitem': 11, 'listdir': 6, 'list_prefix': 0}
# 6057128b: {'iter': 4, 'contains': 2, 'setitem': 1, 'getitem': 7, 'listdir': 4, 'list_prefix': 0}
expected = {
"iter": 2,
"contains": 2,
"setitem": 1,
"getitem": 5,
"listdir": 2,
"list_prefix": 0,
}
self.check_requests(expected, store)

expected = {
"iter": 1,
"contains": 2,
"setitem": 0,
"getitem": 5,
"listdir": 1,
"list_prefix": 0,
}
with open_dataset(store, engine="zarr") as actual:
assert_identical(actual, ds)
self.check_requests(expected, store)


@requires_zarr
class TestZarrDictStore(ZarrBase):
@contextlib.contextmanager
Expand Down

0 comments on commit dc33846

Please sign in to comment.