From dc338467e1fc5aff1733525553af98720fbfb390 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 9 May 2024 11:55:57 -0600 Subject: [PATCH] Add regression test --- .gitignore | 5 +- xarray/tests/test_backends.py | 179 ++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 21c18c17ff7..21011f0eaa7 100644 --- a/.gitignore +++ b/.gitignore @@ -50,7 +50,8 @@ nosetests.xml dask-worker-space/ # asv environments -.asv +asv_bench/.asv +asv_bench/pkgs # Translations *.mo @@ -68,7 +69,7 @@ dask-worker-space/ # xarray specific doc/_build -generated/ +doc/generated/ xarray/tests/data/*.grib.*.idx # Sync tools diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0126b130e7c..cfa7653fffa 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -13,6 +13,7 @@ import tempfile import uuid import warnings +from collections import Counter from collections.abc import Generator, Iterator, Mapping from contextlib import ExitStack from io import BytesIO @@ -2990,6 +2991,184 @@ def test_chunked_cftime_datetime(self) -> None: assert original.chunks == actual.chunks +def attach_counter(f): + counter = Counter() + + def wrapper(self, *args): + if f.__name__ in ["__contains__", "__getitem__"] and args[0] == "zarr.json": + # ignore this spam + pass + else: + print(f"Calling {f.__name__} with {args}") + counter.update(args[slice(1)]) if args else counter.update(("foo",)) + return f(self, *args) + + wrapper.counter = counter + return wrapper + + +class CountingStore(KVStoreV3): + def __init__(self): + super().__init__({}) + self.instrumented_methods = [ + "__iter__", + "__contains__", + "__setitem__", + "__getitem__", + "listdir", + "list_prefix", + ] + + @attach_counter + def __iter__(self): + return super().__iter__() + + @attach_counter + def listdir(self, *args, **kwargs): + return super().listdir(*args, **kwargs) + + @attach_counter + def list_prefix(self, *args, **kwargs): + return super().list_prefix(*args, **kwargs) + + @attach_counter + def __contains__(self, key) -> bool: + return super().__contains__(key) + + @attach_counter + def __getitem__(self, key): + return super().__getitem__(key) + + @attach_counter + def __setitem__(self, *args, **kwargs): + return super().__setitem__(*args, **kwargs) + + def summarize(self): + summary = {} + for method in self.instrumented_methods: + name = method.strip("__") + if counter := getattr(self, method).counter: + summary[name] = sum(counter.values()) + else: + summary[name] = 0 + return summary + + def reset(self): + for method in self.instrumented_methods: + getattr(self, method).counter.clear() + + +@requires_zarr +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") +class TestInstrumentedZarrStore: + @contextlib.contextmanager + def create_zarr_target(self): + yield CountingStore() + + def check_requests(self, expected, store): + summary = store.summarize() + for k in summary: + assert summary[k] <= expected[k], summary + store.reset() + + def test_append(self) -> None: + original = Dataset({"foo": ("x", [1])}, coords={"x": [0]}) + modified = Dataset({"foo": ("x", [2])}, coords={"x": [1]}) + with self.create_zarr_target() as store: + original.to_zarr(store) + expected = { + "iter": 2, + "contains": 9, + "setitem": 9, + "getitem": 6, + "listdir": 2, + "list_prefix": 2, + } + self.check_requests(expected, store) + + modified.to_zarr(store, mode="a", append_dim="x") + # v2024.03.0: {'iter': 6, 'contains': 2, 'setitem': 5, 'getitem': 10, 'listdir': 6, 'list_prefix': 0} + # 6057128b: {'iter': 5, 'contains': 2, 'setitem': 5, 'getitem': 10, "listdir": 5, "list_prefix": 0} + expected = { + "iter": 2, + "contains": 9, + "setitem": 8, + "getitem": 6, + "listdir": 2, + "list_prefix": 2, + } + self.check_requests(expected, store) + + modified.to_zarr(store, mode="a-", append_dim="x") + expected = { + "iter": 2, + "contains": 2, + "setitem": 5, + "getitem": 6, + "listdir": 2, + "list_prefix": 2, + } + self.check_requests(expected, store) + + with open_dataset(store, engine="zarr") as actual: + assert_identical( + actual, xr.concat([original, modified, modified], dim="x") + ) + + @requires_dask + def test_region_write(self) -> None: + ds = Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]}).chunk() + with self.create_zarr_target() as store: + ds.to_zarr(store, mode="w", compute=False) + expected = { + "iter": 2, + "contains": 7, + "setitem": 9, + "getitem": 6, + "listdir": 2, + "list_prefix": 4, + } + self.check_requests(expected, store) + + ds.to_zarr(store, region={"x": slice(None)}) + # v2024.03.0: {'iter': 5, 'contains': 2, 'setitem': 1, 'getitem': 6, 'listdir': 5, 'list_prefix': 0} + # 6057128b: {'iter': 4, 'contains': 2, 'setitem': 1, 'getitem': 5, 'listdir': 4, 'list_prefix': 0} + expected = { + "iter": 2, + "contains": 2, + "setitem": 1, + "getitem": 3, + "listdir": 2, + "list_prefix": 0, + } + self.check_requests(expected, store) + + ds.to_zarr(store, region="auto") + # v2024.03.0: {'iter': 6, 'contains': 4, 'setitem': 1, 'getitem': 11, 'listdir': 6, 'list_prefix': 0} + # 6057128b: {'iter': 4, 'contains': 2, 'setitem': 1, 'getitem': 7, 'listdir': 4, 'list_prefix': 0} + expected = { + "iter": 2, + "contains": 2, + "setitem": 1, + "getitem": 5, + "listdir": 2, + "list_prefix": 0, + } + self.check_requests(expected, store) + + expected = { + "iter": 1, + "contains": 2, + "setitem": 0, + "getitem": 5, + "listdir": 1, + "list_prefix": 0, + } + with open_dataset(store, engine="zarr") as actual: + assert_identical(actual, ds) + self.check_requests(expected, store) + + @requires_zarr class TestZarrDictStore(ZarrBase): @contextlib.contextmanager