From ab096b05821a039f7a5b416e83227462625687ff Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 17 Aug 2023 20:14:21 +0200 Subject: [PATCH] unpin `numpy` (#8061) * unpin `numpy` * dispatch to `np.empty_like` if there's no missing value * make sure there's no code path without `create_template` * declare `create_template` as callable * ignore the intentionally wrong typing * mark the ignore as intentional, even if it might not be used this important because the fix in `numpy` that now means we *don't* need it anymore has been around for less than 3 months (requires a sufficiently new version of `mypy`). * also directly check that `Variable.unstack` does not raise warnings * fix the unstack test --- ci/requirements/all-but-dask.yml | 2 +- ci/requirements/doc.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- xarray/core/variable.py | 13 +++++++------ xarray/tests/test_coding_strings.py | 2 +- xarray/tests/test_dataarray.py | 13 +++++++++++++ xarray/tests/test_groupby.py | 2 +- xarray/tests/test_variable.py | 9 +++++++++ 9 files changed, 35 insertions(+), 12 deletions(-) diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index 74c0b72bd0d..4645be08b83 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -23,7 +23,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy<1.24 + - numpy - packaging - pandas - pint<0.21 diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 18c79ee9edc..fe1fe91bb51 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -19,7 +19,7 @@ dependencies: - nbsphinx - netcdf4>=1.5 - numba - - numpy>=1.21,<1.24 + - numpy>=1.21 - packaging>=21.3 - pandas>=1.4 - pooch diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 6abee0b18c3..efa9ccb5a9a 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -22,7 +22,7 @@ dependencies: - netcdf4 - numba - numbagg - - numpy<1.24 + - numpy - packaging - pandas - pint<0.21 diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 7a478c35a58..dd73ef19658 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -25,7 +25,7 @@ dependencies: - numba - numbagg - numexpr - - numpy<1.24 + - numpy - packaging - pandas - pint<0.21 diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 79debe3a952..c89545c43ae 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -7,6 +7,7 @@ import warnings from collections.abc import Hashable, Iterable, Mapping, Sequence from datetime import timedelta +from functools import partial from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn import numpy as np @@ -1836,15 +1837,20 @@ def _unstack_once( new_shape = tuple(list(reordered.shape[: len(other_dims)]) + new_dim_sizes) new_dims = reordered.dims[: len(other_dims)] + new_dim_names + create_template: Callable if fill_value is dtypes.NA: is_missing_values = math.prod(new_shape) > math.prod(self.shape) if is_missing_values: dtype, fill_value = dtypes.maybe_promote(self.dtype) + + create_template = partial(np.full_like, fill_value=fill_value) else: dtype = self.dtype fill_value = dtypes.get_fill_value(dtype) + create_template = np.empty_like else: dtype = self.dtype + create_template = partial(np.full_like, fill_value=fill_value) if sparse: # unstacking a dense multitindexed array to a sparse array @@ -1867,12 +1873,7 @@ def _unstack_once( ) else: - data = np.full_like( - self.data, - fill_value=fill_value, - shape=new_shape, - dtype=dtype, - ) + data = create_template(self.data, shape=new_shape, dtype=dtype) # Indexer is a list of lists of locations. Each list is the locations # on the new dimension. This is robust to the data being sparse; in that diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 0c9f67e77ad..f1eca00f9a1 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -33,7 +33,7 @@ def test_vlen_dtype() -> None: assert strings.check_vlen_dtype(dtype) is bytes # check h5py variant ("vlen") - dtype = np.dtype("O", metadata={"vlen": str}) # type: ignore[call-overload] + dtype = np.dtype("O", metadata={"vlen": str}) # type: ignore[call-overload,unused-ignore] assert strings.check_vlen_dtype(dtype) is str assert strings.check_vlen_dtype(np.dtype(object)) is None diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 56b948dbf16..183c0ad7371 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2490,6 +2490,19 @@ def test_unstack_pandas_consistency(self) -> None: actual = DataArray(s, dims="z").unstack("z") assert_identical(expected, actual) + @pytest.mark.filterwarnings("error") + def test_unstack_roundtrip_integer_array(self) -> None: + arr = xr.DataArray( + np.arange(6).reshape(2, 3), + coords={"x": ["a", "b"], "y": [0, 1, 2]}, + dims=["x", "y"], + ) + + stacked = arr.stack(z=["x", "y"]) + roundtripped = stacked.unstack() + + assert_identical(arr, roundtripped) + def test_stack_nonunique_consistency(self, da) -> None: da = da.isel(time=0, drop=True) # 2D actual = da.stack(z=["a", "x"]) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index dcba25dbdcf..5d99eda1e88 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -728,7 +728,7 @@ def test_groupby_dataset_iter() -> None: def test_groupby_dataset_errors() -> None: data = create_test_data() with pytest.raises(TypeError, match=r"`group` must be"): - data.groupby(np.arange(10)) + data.groupby(np.arange(10)) # type: ignore with pytest.raises(ValueError, match=r"length does not match"): data.groupby(data["dim1"][:3]) with pytest.raises(TypeError, match=r"`group` must be"): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index e1a2d9c8922..f30cdcf3f73 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1707,6 +1707,15 @@ def test_stack_unstack_consistency(self): actual = v.stack(z=("x", "y")).unstack(z={"x": 2, "y": 2}) assert_identical(actual, v) + @pytest.mark.filterwarnings("error::RuntimeWarning") + def test_unstack_without_missing(self): + v = Variable(["z"], [0, 1, 2, 3]) + expected = Variable(["x", "y"], [[0, 1], [2, 3]]) + + actual = v.unstack(z={"x": 2, "y": 2}) + + assert_identical(actual, expected) + def test_broadcasting_math(self): x = np.random.randn(2, 3) v = Variable(["a", "b"], x)