From 13baca18e1b16a43e74d5201e121b27330a9dfd4 Mon Sep 17 00:00:00 2001 From: Andrey Akinshin Date: Mon, 1 Apr 2024 17:42:19 +0200 Subject: [PATCH 1/8] Update reference to 'Weighted quantile estimators' (#8898) --- xarray/core/weighted.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index ae9521309e0..8cb90ac1b2b 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -84,7 +84,7 @@ method supported by this weighted version corresponds to the default "linear" option of ``numpy.quantile``. This is "Type 7" option, described in Hyndman and Fan (1996) [2]_. The implementation is largely inspired by a blog post - from A. Akinshin's [3]_. + from A. Akinshin's (2023) [3]_. Parameters ---------- @@ -122,7 +122,8 @@ .. [1] https://notstatschat.rbind.io/2020/08/04/weights-in-statistics/ .. [2] Hyndman, R. J. & Fan, Y. (1996). Sample Quantiles in Statistical Packages. The American Statistician, 50(4), 361–365. https://doi.org/10.2307/2684934 - .. [3] https://aakinshin.net/posts/weighted-quantiles + .. [3] Akinshin, A. (2023) "Weighted quantile estimators" arXiv:2304.07265 [stat.ME] + https://arxiv.org/abs/2304.07265 """ From 6de99369502901374438583eba9f9818929885ed Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 1 Apr 2024 13:49:06 -0400 Subject: [PATCH 2/8] New empty whatsnew entry (#8899) * new empty whatsnew entry * Update doc/whats-new.rst Co-authored-by: Deepak Cherian --------- Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 738a833b413..e421eeb3a7f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -15,6 +15,27 @@ What's New np.random.seed(123456) +.. _whats-new.2024.04.0: + +v2024.04.0 (unreleased) +----------------------- + +New Features +~~~~~~~~~~~~ + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + + .. _whats-new.2024.03.0: v2024.03.0 (Mar 29, 2024) From 7e2415012226fe50ed8e1cdefbc073a78b592418 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 11:02:55 -0700 Subject: [PATCH 3/8] Bump the actions group with 1 update (#8896) Bumps the actions group with 1 update: [codecov/codecov-action](https://github.com/codecov/codecov-action). Updates `codecov/codecov-action` from 4.1.0 to 4.1.1 - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4.1.0...v4.1.1) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci-additional.yaml | 8 ++++---- .github/workflows/ci.yaml | 2 +- .github/workflows/upstream-dev-ci.yaml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 9aa3b17746f..0fc30cc6b3a 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -128,7 +128,7 @@ jobs: python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/ - name: Upload mypy coverage to Codecov - uses: codecov/codecov-action@v4.1.0 + uses: codecov/codecov-action@v4.1.1 with: file: mypy_report/cobertura.xml flags: mypy @@ -182,7 +182,7 @@ jobs: python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/ - name: Upload mypy coverage to Codecov - uses: codecov/codecov-action@v4.1.0 + uses: codecov/codecov-action@v4.1.1 with: file: mypy_report/cobertura.xml flags: mypy39 @@ -243,7 +243,7 @@ jobs: python -m pyright xarray/ - name: Upload pyright coverage to Codecov - uses: codecov/codecov-action@v4.1.0 + uses: codecov/codecov-action@v4.1.1 with: file: pyright_report/cobertura.xml flags: pyright @@ -302,7 +302,7 @@ jobs: python -m pyright xarray/ - name: Upload pyright coverage to Codecov - uses: codecov/codecov-action@v4.1.0 + uses: codecov/codecov-action@v4.1.1 with: file: pyright_report/cobertura.xml flags: pyright39 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a37ff876e20..f6cc2bbb834 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -143,7 +143,7 @@ jobs: path: pytest.xml - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v4.1.0 + uses: codecov/codecov-action@v4.1.1 with: file: ./coverage.xml flags: unittests diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 872b2d865fb..f7a98206167 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -143,7 +143,7 @@ jobs: run: | python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report - name: Upload mypy coverage to Codecov - uses: codecov/codecov-action@v4.1.0 + uses: codecov/codecov-action@v4.1.1 with: file: mypy_report/cobertura.xml flags: mypy From 97d3a3aaa071fa5341132331abe90ec39f914b52 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 11:57:42 -0700 Subject: [PATCH 4/8] [pre-commit.ci] pre-commit autoupdate (#8900) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 8 ++++---- xarray/coding/times.py | 10 +--------- xarray/coding/variables.py | 4 +--- xarray/core/arithmetic.py | 4 ++-- xarray/core/computation.py | 6 +----- xarray/core/formatting.py | 4 ++-- 6 files changed, 11 insertions(+), 25 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 74d77e2f2ca..970b2e5e8ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,13 +13,13 @@ repos: - id: mixed-line-ending - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.2.0' + rev: 'v0.3.4' hooks: - id: ruff args: ["--fix", "--show-fixes"] # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.1.1 + rev: 24.3.0 hooks: - id: black-jupyter - repo: https://github.com/keewis/blackdoc @@ -27,10 +27,10 @@ repos: hooks: - id: blackdoc exclude: "generate_aggregations.py" - additional_dependencies: ["black==24.1.1"] + additional_dependencies: ["black==24.3.0"] - id: blackdoc-autoupdate-black - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.8.0 + rev: v1.9.0 hooks: - id: mypy # Copied from setup.cfg diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 92bce0abeaa..466e847e003 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -446,15 +446,7 @@ def format_cftime_datetime(date) -> str: """Converts a cftime.datetime object to a string with the format: YYYY-MM-DD HH:MM:SS.UUUUUU """ - return "{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d}".format( - date.year, - date.month, - date.day, - date.hour, - date.minute, - date.second, - date.microsecond, - ) + return f"{date.year:04d}-{date.month:02d}-{date.day:02d} {date.hour:02d}:{date.minute:02d}:{date.second:02d}.{date.microsecond:06d}" def infer_timedelta_units(deltas) -> str: diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 52cf0fc3656..d31cb6e626a 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -81,9 +81,7 @@ def get_duck_array(self): return self.func(self.array.get_duck_array()) def __repr__(self) -> str: - return "{}({!r}, func={!r}, dtype={!r})".format( - type(self).__name__, self.array, self.func, self.dtype - ) + return f"{type(self).__name__}({self.array!r}, func={self.func!r}, dtype={self.dtype!r})" class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 452c7115b75..734d7b328de 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -62,10 +62,10 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if method != "__call__": # TODO: support other methods, e.g., reduce and accumulate. raise NotImplementedError( - "{} method for ufunc {} is not implemented on xarray objects, " + f"{method} method for ufunc {ufunc} is not implemented on xarray objects, " "which currently only support the __call__ method. As an " "alternative, consider explicitly converting xarray objects " - "to NumPy arrays (e.g., with `.values`).".format(method, ufunc) + "to NumPy arrays (e.g., with `.values`)." ) if any(isinstance(o, SupportsArithmetic) for o in out): diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f29f6c4dd35..f09b04b7765 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -133,11 +133,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return "{}({!r}, {!r})".format( - type(self).__name__, - list(self.input_core_dims), - list(self.output_core_dims), - ) + return f"{type(self).__name__}({list(self.input_core_dims)!r}, {list(self.output_core_dims)!r})" def __str__(self): lhs = ",".join("({})".format(",".join(dims)) for dims in self.input_core_dims) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 260dabd9d31..3eed7d02a2e 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -289,8 +289,8 @@ def inline_sparse_repr(array): """Similar to sparse.COO.__repr__, but without the redundant shape/dtype.""" sparse_array_type = array_type("sparse") assert isinstance(array, sparse_array_type), array - return "<{}: nnz={:d}, fill_value={!s}>".format( - type(array).__name__, array.nnz, array.fill_value + return ( + f"<{type(array).__name__}: nnz={array.nnz:d}, fill_value={array.fill_value!s}>" ) From c5b90c54d55edd3021415427d9a26666b88da7b6 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 3 Apr 2024 01:52:31 +0200 Subject: [PATCH 5/8] Update docstring for compute and persist (#8903) * Update docstring for compute and persist * Add compute reference to load docstring for dask array * Use new wording for persist Co-authored-by: Justus Magin * Update xarray/core/dataarray.py Co-authored-by: Justus Magin * Apply suggestions from code review Co-authored-by: Justus Magin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add "all" to persist docstring * Apply suggestions from code review Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --------- Co-authored-by: Justus Magin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataarray.py | 18 ++++++++++++++++-- xarray/core/dataset.py | 11 +++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 80dcfe1302c..509962ff80d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1126,6 +1126,8 @@ def load(self, **kwargs) -> Self: """Manually trigger loading of this array's data from disk or a remote source into memory and return this array. + Unlike compute, the original dataset is modified and returned. + Normally, it should not be necessary to call this method in user code, because all xarray functions should either work on deferred data or load data automatically. However, this method can be necessary when @@ -1148,8 +1150,9 @@ def load(self, **kwargs) -> Self: def compute(self, **kwargs) -> Self: """Manually trigger loading of this array's data from disk or a - remote source into memory and return a new array. The original is - left unaltered. + remote source into memory and return a new array. + + Unlike load, the original is left unaltered. Normally, it should not be necessary to call this method in user code, because all xarray functions should either work on deferred data or @@ -1161,6 +1164,11 @@ def compute(self, **kwargs) -> Self: **kwargs : dict Additional keyword arguments passed on to ``dask.compute``. + Returns + ------- + object : DataArray + New object with the data and all coordinates as in-memory arrays. + See Also -------- dask.compute @@ -1174,12 +1182,18 @@ def persist(self, **kwargs) -> Self: This keeps them as dask arrays but encourages them to keep data in memory. This is particularly useful when on a distributed machine. When on a single machine consider using ``.compute()`` instead. + Like compute (but unlike load), the original dataset is left unaltered. Parameters ---------- **kwargs : dict Additional keyword arguments passed on to ``dask.persist``. + Returns + ------- + object : DataArray + New object with all dask-backed data and coordinates as persisted dask arrays. + See Also -------- dask.persist diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2c0b3e89722..4c80a47209e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1005,6 +1005,11 @@ def compute(self, **kwargs) -> Self: **kwargs : dict Additional keyword arguments passed on to ``dask.compute``. + Returns + ------- + object : Dataset + New object with lazy data variables and coordinates as in-memory arrays. + See Also -------- dask.compute @@ -1037,12 +1042,18 @@ def persist(self, **kwargs) -> Self: operation keeps the data as dask arrays. This is particularly useful when using the dask.distributed scheduler and you want to load a large amount of data into distributed memory. + Like compute (but unlike load), the original dataset is left unaltered. Parameters ---------- **kwargs : dict Additional keyword arguments passed on to ``dask.persist``. + Returns + ------- + object : Dataset + New object with all dask-backed coordinates and data variables as persisted dask arrays. + See Also -------- dask.persist From 40afd30405d0614f780a228777b780232bd8780c Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 3 Apr 2024 15:29:35 -0600 Subject: [PATCH 6/8] Stateful tests with Dataset (#8658) * Stateful tests with Dataset * Disable check_default_indexes when needed * Add Zarr roundtrip * Randomize dimension choice * Fix a bug * Add reset_index * Add stack, unstack * [revert] Disable Zarr till we control attrs strategy * Try making unique names * Share names strategy to ensure uniques? * cleanup * Try sharing strategies better * Fix endianness * Better swap_dims * More improvements * WIP * Drop duplicates before unstacking * Add reset_index * Better duplicate assumption * Move * Fix reset_index * Skip if hypothesis not installed * Better precondition around reset_index * Note * Try a bundle * Use unique_subset_of * Use Bundles more * Add index_variables strategy * Small improvement * fix * Use st.shared * Revert "Use st.shared" This reverts commit 50f60308fdbb5e04cc3e5263bb5283e25c82159d. * fix unstacking * cleanup * WIP * Remove bundles * Fixes * Add hypothesis cache to CI * Prevent index variables with NaNs, infs * [revert] * Always save hypothesis cache * Expand dtypes * Add invariant check for #8646 * Add drop_dims * Add create_index to stack * Generalize a bit * limit number of indexes to stack * Fix endianness? * uniquify drop_dims * Avoid NaTs in index vars https://github.com/HypothesisWorks/hypothesis/issues/3943 * Guard swap_dims * Revert "Add invariant check for #8646" This reverts commit 4a958dcf0fb7acab3d5123b3bd2d4138fb522f98. * Add drop_indexes * Add assign_coords * Fix max_period for pandas timedelta * Add xfailed test * Add notes * Skip timedelta indexes * to_zarr * small tweaks * Remove NaT assume * Revert "[revert]" This reverts commit 6a38e271eed61ecb943f60d3aed0875d0437de7b. * Add hypothesis workflow * Swtich out * fix * Use st.builds * cleanup * Add initialize * review feedback --- .github/workflows/ci-additional.yaml | 3 +- .github/workflows/ci.yaml | 8 + .github/workflows/hypothesis.yaml | 100 ++++++++++ properties/conftest.py | 21 ++ properties/test_index_manipulation.py | 273 ++++++++++++++++++++++++++ pyproject.toml | 1 + xarray/testing/strategies.py | 26 ++- 7 files changed, 429 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/hypothesis.yaml create mode 100644 properties/test_index_manipulation.py diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 0fc30cc6b3a..2e615f3d429 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -35,14 +35,13 @@ jobs: runs-on: "ubuntu-latest" needs: detect-ci-trigger if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: run: shell: bash -l {0} - env: CONDA_ENV_FILE: ci/requirements/environment.yml PYTHON_VERSION: "3.11" - steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f6cc2bbb834..da7402a0708 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -127,6 +127,14 @@ jobs: run: | python -c "import xarray" + - name: Restore cached hypothesis directory + uses: actions/cache@v4 + with: + path: .hypothesis/ + key: cache-hypothesis + enableCrossOsArchive: true + save-always: true + - name: Run tests run: python -m pytest -n 4 --timeout 180 diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml new file mode 100644 index 00000000000..7e9459c6598 --- /dev/null +++ b/.github/workflows/hypothesis.yaml @@ -0,0 +1,100 @@ +name: Slow Hypothesis CI +on: + push: + branches: + - "main" + pull_request: + branches: + - "main" + types: [opened, reopened, synchronize, labeled] + workflow_dispatch: # allows you to trigger manually + +jobs: + detect-ci-trigger: + name: detect ci trigger + runs-on: ubuntu-latest + if: | + github.repository == 'pydata/xarray' + && (github.event_name == 'push' || github.event_name == 'pull_request') + outputs: + triggered: ${{ steps.detect-trigger.outputs.trigger-found }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 2 + - uses: xarray-contrib/ci-trigger@v1 + id: detect-trigger + with: + keyword: "[skip-ci]" + + hypothesis: + name: Slow Hypothesis Tests + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + if: | + always() + && ( + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + || needs.detect-ci-trigger.outputs.triggered == 'true' + || contains( github.event.pull_request.labels.*.name, 'run-slow-hypothesis') + ) + defaults: + run: + shell: bash -l {0} + + env: + CONDA_ENV_FILE: ci/requirements/environment.yml + PYTHON_VERSION: "3.12" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: set environment variables + run: | + echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ci/requirements/environment.yml + environment-name: xarray-tests + create-args: >- + python=${{env.PYTHON_VERSION}} + pytest-reportlog + cache-environment: true + cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Restore cached hypothesis directory + uses: actions/cache@v4 + with: + path: .hypothesis/ + key: cache-hypothesis + enableCrossOsArchive: true + save-always: true + - name: Run slow Hypothesis tests + if: success() + id: status + run: | + python -m pytest --hypothesis-show-statistics --run-slow-hypothesis properties/*.py \ + --report-log output-${{ matrix.python-version }}-log.jsonl + - name: Generate and publish the report + if: | + failure() + && steps.status.outcome == 'failure' + && github.event_name == 'schedule' + && github.repository_owner == 'pydata' + uses: xarray-contrib/issue-from-pytest-log@v1 + with: + log-path: output-${{ matrix.python-version }}-log.jsonl + issue-title: "Nightly Hypothesis tests failed" + issue-label: "topic-hypothesis" diff --git a/properties/conftest.py b/properties/conftest.py index 0a66d92ebc6..30e638161a1 100644 --- a/properties/conftest.py +++ b/properties/conftest.py @@ -1,3 +1,24 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--run-slow-hypothesis", + action="store_true", + default=False, + help="run slow hypothesis tests", + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--run-slow-hypothesis"): + return + skip_slow_hyp = pytest.mark.skip(reason="need --run-slow-hypothesis option to run") + for item in items: + if "slow_hypothesis" in item.keywords: + item.add_marker(skip_slow_hyp) + + try: from hypothesis import settings except ImportError: diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py new file mode 100644 index 00000000000..77b7fcbcd99 --- /dev/null +++ b/properties/test_index_manipulation.py @@ -0,0 +1,273 @@ +import itertools + +import numpy as np +import pytest + +import xarray as xr +from xarray import Dataset +from xarray.testing import _assert_internal_invariants + +pytest.importorskip("hypothesis") +pytestmark = pytest.mark.slow_hypothesis + +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st +from hypothesis import note, settings +from hypothesis.stateful import ( + RuleBasedStateMachine, + initialize, + invariant, + precondition, + rule, +) + +import xarray.testing.strategies as xrst + + +@st.composite +def unique(draw, strategy): + # https://stackoverflow.com/questions/73737073/create-hypothesis-strategy-that-returns-unique-values + seen = draw(st.shared(st.builds(set), key="key-for-unique-elems")) + return draw( + strategy.filter(lambda x: x not in seen).map(lambda x: seen.add(x) or x) + ) + + +# Share to ensure we get unique names on each draw, +# so we don't try to add two variables with the same name +# or stack to a dimension with a name that already exists in the Dataset. +UNIQUE_NAME = unique(strategy=xrst.names()) +DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) +index_variables = st.builds( + xr.Variable, + data=npst.arrays( + dtype=xrst.pandas_index_dtypes(), + shape=npst.array_shapes(min_dims=1, max_dims=1), + elements=dict(allow_nan=False, allow_infinity=False, allow_subnormal=False), + unique=True, + ), + dims=DIM_NAME, + attrs=xrst.attrs(), +) + + +def add_dim_coord_and_data_var(ds, var): + (name,) = var.dims + # dim coord + ds[name] = var + # non-dim coord of same size; this allows renaming + ds[name + "_"] = var + + +class DatasetStateMachine(RuleBasedStateMachine): + # Can't use bundles because we'd need pre-conditions on consumes(bundle) + # indexed_dims = Bundle("indexed_dims") + # multi_indexed_dims = Bundle("multi_indexed_dims") + + def __init__(self): + super().__init__() + self.dataset = Dataset() + self.check_default_indexes = True + + # We track these separately as lists so we can guarantee order of iteration over them. + # Order of iteration over Dataset.dims is not guaranteed + self.indexed_dims = [] + self.multi_indexed_dims = [] + + @initialize(var=index_variables) + def init_ds(self, var): + """Initialize the Dataset so that at least one rule will always fire.""" + (name,) = var.dims + add_dim_coord_and_data_var(self.dataset, var) + + self.indexed_dims.append(name) + + # TODO: stacking with a timedelta64 index and unstacking converts it to object + @rule(var=index_variables) + def add_dim_coord(self, var): + (name,) = var.dims + note(f"adding dimension coordinate {name}") + add_dim_coord_and_data_var(self.dataset, var) + + self.indexed_dims.append(name) + + @rule(var=index_variables) + def assign_coords(self, var): + (name,) = var.dims + note(f"assign_coords: {name}") + self.dataset = self.dataset.assign_coords({name: var}) + + self.indexed_dims.append(name) + + @property + def has_indexed_dims(self) -> bool: + return bool(self.indexed_dims + self.multi_indexed_dims) + + @rule(data=st.data()) + @precondition(lambda self: self.has_indexed_dims) + def reset_index(self, data): + dim = data.draw(st.sampled_from(self.indexed_dims + self.multi_indexed_dims)) + self.check_default_indexes = False + note(f"> resetting {dim}") + self.dataset = self.dataset.reset_index(dim) + + if dim in self.indexed_dims: + del self.indexed_dims[self.indexed_dims.index(dim)] + elif dim in self.multi_indexed_dims: + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + + @rule(newname=UNIQUE_NAME, data=st.data(), create_index=st.booleans()) + @precondition(lambda self: bool(self.indexed_dims)) + def stack(self, newname, data, create_index): + oldnames = data.draw( + st.lists( + st.sampled_from(self.indexed_dims), + min_size=1, + max_size=3 if create_index else None, + unique=True, + ) + ) + note(f"> stacking {oldnames} as {newname}") + self.dataset = self.dataset.stack( + {newname: oldnames}, create_index=create_index + ) + + if create_index: + self.multi_indexed_dims += [newname] + + # if create_index is False, then we just drop these + for dim in oldnames: + del self.indexed_dims[self.indexed_dims.index(dim)] + + @rule(data=st.data()) + @precondition(lambda self: bool(self.multi_indexed_dims)) + def unstack(self, data): + # TODO: add None + dim = data.draw(st.sampled_from(self.multi_indexed_dims)) + note(f"> unstacking {dim}") + if dim is not None: + pd_index = self.dataset.xindexes[dim].index + self.dataset = self.dataset.unstack(dim) + + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + + if dim is not None: + self.indexed_dims.extend(pd_index.names) + else: + # TODO: fix this + pass + + @rule(newname=UNIQUE_NAME, data=st.data()) + @precondition(lambda self: bool(self.dataset.variables)) + def rename_vars(self, newname, data): + dim = data.draw(st.sampled_from(sorted(self.dataset.variables))) + # benbovy: "skip the default indexes invariant test when the name of an + # existing dimension coordinate is passed as input kwarg or dict key + # to .rename_vars()." + self.check_default_indexes = False + note(f"> renaming {dim} to {newname}") + self.dataset = self.dataset.rename_vars({dim: newname}) + + if dim in self.indexed_dims: + del self.indexed_dims[self.indexed_dims.index(dim)] + elif dim in self.multi_indexed_dims: + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + + @precondition(lambda self: bool(self.dataset.dims)) + @rule(data=st.data()) + def drop_dims(self, data): + dims = data.draw( + st.lists( + st.sampled_from(sorted(tuple(self.dataset.dims))), + min_size=1, + unique=True, + ) + ) + note(f"> drop_dims: {dims}") + self.dataset = self.dataset.drop_dims(dims) + + for dim in dims: + if dim in self.indexed_dims: + del self.indexed_dims[self.indexed_dims.index(dim)] + elif dim in self.multi_indexed_dims: + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + + @precondition(lambda self: bool(self.indexed_dims)) + @rule(data=st.data()) + def drop_indexes(self, data): + self.check_default_indexes = False + + dims = data.draw( + st.lists(st.sampled_from(self.indexed_dims), min_size=1, unique=True) + ) + note(f"> drop_indexes: {dims}") + self.dataset = self.dataset.drop_indexes(dims) + + for dim in dims: + if dim in self.indexed_dims: + del self.indexed_dims[self.indexed_dims.index(dim)] + elif dim in self.multi_indexed_dims: + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + + @property + def swappable_dims(self): + ds = self.dataset + options = [] + for dim in self.indexed_dims: + choices = [ + name + for name, var in ds._variables.items() + if var.dims == (dim,) + # TODO: Avoid swapping a dimension to itself + and name != dim + ] + options.extend( + (a, b) for a, b in itertools.zip_longest((dim,), choices, fillvalue=dim) + ) + return options + + @rule(data=st.data()) + # TODO: swap_dims is basically all broken if a multiindex is present + # TODO: Avoid swapping from Index to a MultiIndex level + # TODO: Avoid swapping from MultiIndex to a level of the same MultiIndex + # TODO: Avoid swapping when a MultiIndex is present + @precondition(lambda self: not bool(self.multi_indexed_dims)) + @precondition(lambda self: bool(self.swappable_dims)) + def swap_dims(self, data): + ds = self.dataset + options = self.swappable_dims + dim, to = data.draw(st.sampled_from(options)) + note( + f"> swapping {dim} to {to}, found swappable dims: {options}, all_dims: {tuple(self.dataset.dims)}" + ) + self.dataset = ds.swap_dims({dim: to}) + + del self.indexed_dims[self.indexed_dims.index(dim)] + self.indexed_dims += [to] + + @invariant() + def assert_invariants(self): + # note(f"> ===\n\n {self.dataset!r} \n===\n\n") + _assert_internal_invariants(self.dataset, self.check_default_indexes) + + +DatasetStateMachine.TestCase.settings = settings(max_examples=300, deadline=None) +DatasetTest = DatasetStateMachine.TestCase + + +@pytest.mark.skip(reason="failure detected by hypothesis") +def test_unstack_object(): + import xarray as xr + + ds = xr.Dataset() + ds["0"] = np.array(["", "\x000"], dtype=object) + ds.stack({"1": ["0"]}).unstack() + + +@pytest.mark.skip(reason="failure detected by hypothesis") +def test_unstack_timedelta_index(): + import xarray as xr + + ds = xr.Dataset() + ds["0"] = np.array([0, 1, 2, 3], dtype="timedelta64[ns]") + ds.stack({"1": ["0"]}).unstack() diff --git a/pyproject.toml b/pyproject.toml index 7836cba40d4..751c9085ec8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -294,6 +294,7 @@ markers = [ "flaky: flaky tests", "network: tests requiring a network connection", "slow: slow tests", + "slow_hypothesis: slow hypothesis tests", ] minversion = "7" python_files = "test_*.py" diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index c5a7afdf54e..d2503dfd535 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -21,6 +21,7 @@ __all__ = [ "supported_dtypes", + "pandas_index_dtypes", "names", "dimension_names", "dimension_sizes", @@ -59,6 +60,26 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: | npst.unsigned_integer_dtypes() | npst.floating_dtypes() | npst.complex_number_dtypes() + # | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + # | npst.unicode_string_dtypes() + ) + + +def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: + """ + Dtypes supported by pandas indexes. + Restrict datetime64 and timedelta64 to ns frequency till Xarray relaxes that. + """ + return ( + npst.integer_dtypes(endianness="=", sizes=(32, 64)) + | npst.unsigned_integer_dtypes(endianness="=", sizes=(32, 64)) + | npst.floating_dtypes(endianness="=", sizes=(32, 64)) + # TODO: unset max_period + | npst.datetime64_dtypes(endianness="=", max_period="ns") + # TODO: set max_period="D" + | npst.timedelta64_dtypes(endianness="=", max_period="ns") + | npst.unicode_string_dtypes(endianness="=") ) @@ -87,6 +108,7 @@ def names() -> st.SearchStrategy[str]: def dimension_names( *, + name_strategy=names(), min_dims: int = 0, max_dims: int = 3, ) -> st.SearchStrategy[list[Hashable]]: @@ -97,6 +119,8 @@ def dimension_names( Parameters ---------- + name_strategy + Strategy for making names. Useful if we need to share this. min_dims Minimum number of dimensions in generated list. max_dims @@ -104,7 +128,7 @@ def dimension_names( """ return st.lists( - elements=names(), + elements=name_strategy, min_size=min_dims, max_size=max_dims, unique=True, From 40b3f2ca926f5c13045cc1a430fa226e96dc3728 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 3 Apr 2024 20:17:47 -0600 Subject: [PATCH 7/8] Trigger hypothesis stateful tests nightly (#8907) --- .github/workflows/hypothesis.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml index 7e9459c6598..62eb0800b2d 100644 --- a/.github/workflows/hypothesis.yaml +++ b/.github/workflows/hypothesis.yaml @@ -7,6 +7,8 @@ on: branches: - "main" types: [opened, reopened, synchronize, labeled] + schedule: + - cron: "0 0 * * *" # Daily β€œAt 00:00” UTC workflow_dispatch: # allows you to trigger manually jobs: From 56182f73c56bc619a18a9ee707ef6c19d54c58a2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 4 Apr 2024 10:46:53 -0600 Subject: [PATCH 8/8] Don't access data when creating DataArray from Variable. (#8754) * Don't access data when creating DataArray from Variable. Closes #8573 * Fix DataArray * Fix test * Add comment --- xarray/core/variable.py | 9 +++++++-- xarray/tests/test_dataarray.py | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ec284e411fc..1f18f61441c 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -264,8 +264,13 @@ def as_compatible_data( from xarray.core.dataarray import DataArray - if isinstance(data, (Variable, DataArray)): - return data.data + # TODO: do this uwrapping in the Variable/NamedArray constructor instead. + if isinstance(data, Variable): + return cast("T_DuckArray", data._data) + + # TODO: do this uwrapping in the DataArray constructor instead. + if isinstance(data, DataArray): + return cast("T_DuckArray", data._variable._data) if isinstance(data, NON_NUMPY_SUPPORTED_ARRAY_TYPES): data = _possibly_convert_datetime_or_timedelta_index(data) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 04112a16ab3..f3413ea40a4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4947,7 +4947,7 @@ def test_idxmin( with pytest.raises(ValueError): xr.DataArray(5).idxmin() - coordarr0 = xr.DataArray(ar0.coords["x"], dims=["x"]) + coordarr0 = xr.DataArray(ar0.coords["x"].data, dims=["x"]) coordarr1 = coordarr0.copy() hasna = np.isnan(minindex) @@ -5062,7 +5062,7 @@ def test_idxmax( with pytest.raises(ValueError): xr.DataArray(5).idxmax() - coordarr0 = xr.DataArray(ar0.coords["x"], dims=["x"]) + coordarr0 = xr.DataArray(ar0.coords["x"].data, dims=["x"]) coordarr1 = coordarr0.copy() hasna = np.isnan(maxindex) @@ -7167,3 +7167,13 @@ def test_nD_coord_dataarray() -> None: _assert_internal_invariants(da4, check_default_indexes=True) assert "x" not in da4.xindexes assert "x" in da4.coords + + +def test_lazy_data_variable_not_loaded(): + # GH8753 + array = InaccessibleArray(np.array([1, 2, 3])) + v = Variable(data=array, dims="x") + # No data needs to be accessed, so no error should be raised + da = xr.DataArray(v) + # No data needs to be accessed, so no error should be raised + xr.DataArray(da)