From d5a6ef1d215c1040501a9320a0895a8de2c6e7b7 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Fri, 12 Dec 2025 11:17:48 -0500 Subject: [PATCH 1/3] add drop_existing kwarg to set_xindex --- doc/internals/how-to-create-custom-index.rst | 9 +++----- doc/whats-new.rst | 4 ++++ xarray/core/dataarray.py | 8 ++++++- xarray/core/dataset.py | 13 +++++++++--- xarray/tests/test_dataarray.py | 6 ++++++ xarray/tests/test_dataset.py | 22 ++++++++++++++++++++ 6 files changed, 52 insertions(+), 10 deletions(-) diff --git a/doc/internals/how-to-create-custom-index.rst b/doc/internals/how-to-create-custom-index.rst index 351694fc62d..4621e052539 100644 --- a/doc/internals/how-to-create-custom-index.rst +++ b/doc/internals/how-to-create-custom-index.rst @@ -224,12 +224,9 @@ custom index to a Dataset or DataArray, e.g., using the ``RasterIndex`` above: dims=("y", "x"), ) - # Xarray create default indexes for the 'x' and 'y' coordinates - # we first need to explicitly drop it - da = da.drop_indexes(["x", "y"]) - - # Build a RasterIndex from the 'x' and 'y' coordinates - da_raster = da.set_xindex(["x", "y"], RasterIndex) + # Xarray creates default indexes for the 'x' and 'y' coordinates + # Use drop_existing=True to replace them with a custom index + da_raster = da.set_xindex(["x", "y"], RasterIndex, drop_existing=True) # RasterIndex now takes care of label-based selection selected = da_raster.sel(x=10, y=slice(20, 50)) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 89e9fbef56f..4bdafa3eb82 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,10 @@ v2025.12.1 (unreleased) New Features ~~~~~~~~~~~~ +- Added ``drop_existing`` parameter to :py:meth:`Dataset.set_xindex` and + :py:meth:`DataArray.set_xindex` to allow replacing existing indexes without + needing to call :py:meth:`drop_indexes` first (:pull:`XXXX`). + By `Ian Hunt-Isaak `_. Breaking Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 71d427d3db9..d1fc3796421 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2866,6 +2866,7 @@ def set_xindex( self, coord_names: str | Sequence[Hashable], index_cls: type[Index] | None = None, + drop_existing: bool = False, **options, ) -> Self: """Set a new, Xarray-compatible index from one or more existing @@ -2879,6 +2880,9 @@ def set_xindex( index_cls : subclass of :class:`~xarray.indexes.Index` The type of index to create. By default, try setting a pandas (multi-)index from the supplied coordinates. + drop_existing : bool + Whether to drop indexes on any existing coord_names if one + is present. **options Options passed to the index constructor. @@ -2888,7 +2892,9 @@ def set_xindex( Another dataarray, with this dataarray's data and with a new index. """ - ds = self._to_temp_dataset().set_xindex(coord_names, index_cls, **options) + ds = self._to_temp_dataset().set_xindex( + coord_names, index_cls, drop_existing, **options + ) return self._from_temp_dataset(ds) def reorder_levels( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bce048048da..9f6a32e8352 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4956,6 +4956,7 @@ def set_xindex( self, coord_names: str | Sequence[Hashable], index_cls: type[Index] | None = None, + drop_existing: bool = False, **options, ) -> Self: """Set a new, Xarray-compatible index from one or more existing @@ -4970,6 +4971,9 @@ def set_xindex( The type of index to create. By default, try setting a ``PandasIndex`` if ``len(coord_names) == 1``, otherwise a ``PandasMultiIndex``. + drop_existing : bool + Whether to drop indexes on any existing coord_names if one + is present **options Options passed to the index constructor. @@ -5010,9 +5014,12 @@ def set_xindex( indexed_coords = set(coord_names) & set(self._indexes) if indexed_coords: - raise ValueError( - f"those coordinates already have an index: {indexed_coords}" - ) + if drop_existing: + self.drop_indexes(indexed_coords) + else: + raise ValueError( + f"those coordinates already have an index: {indexed_coords}" + ) coord_vars = {name: self._variables[name] for name in coord_names} diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 7ae36421e14..5ea327f5ece 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2418,6 +2418,12 @@ def from_variables(cls, variables, options): assert "foo" in indexed.xindexes assert indexed.xindexes["foo"].opt == 1 # type: ignore[attr-defined] + def test_set_xindex_drop_existing(self) -> None: + # Basic test that drop_existing parameter is passed through to Dataset + da = DataArray([1, 2, 3, 4], coords={"x": ("x", [0, 1, 2, 3])}, dims="x") + result = da.set_xindex("x", PandasIndex, drop_existing=True) + assert "x" in result.xindexes + def test_dataset_getitem(self) -> None: dv = self.ds["foo"] assert_identical(dv, self.dv) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6dce32aeb5c..e87cb45d631 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4000,6 +4000,28 @@ class NotAnIndex: ... with pytest.raises(ValueError, match="those coordinates already have an index"): ds2.set_xindex("x", PandasIndex) + def test_set_xindex_drop_existing(self) -> None: + # Test that drop_existing=True allows replacing an existing index + # (the default drop_existing=False raising ValueError is tested in test_set_xindex) + ds = Dataset(coords={"x": ("x", [0, 1, 2, 3])}) + + # With drop_existing=True, it should succeed + result = ds.set_xindex("x", PandasIndex, drop_existing=True) + assert "x" in result.xindexes + assert isinstance(result.xindexes["x"], PandasIndex) + + # Test that drop_existing=True replaces with a custom index + class CustomIndex(PandasIndex): + pass + + result_custom = ds.set_xindex("x", CustomIndex, drop_existing=True) + assert "x" in result_custom.xindexes + assert isinstance(result_custom.xindexes["x"], CustomIndex) + + # Verify the result is equivalent to drop_indexes + set_xindex + expected = ds.drop_indexes("x").set_xindex("x", CustomIndex) + assert_identical(result_custom, expected) + def test_set_xindex_options(self) -> None: ds = Dataset(coords={"foo": ("x", ["a", "a", "b", "b"])}) From 52922374255ad97bc68be8735779d4a67c597bbd Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Fri, 12 Dec 2025 11:19:50 -0500 Subject: [PATCH 2/3] keep important part of comment --- doc/internals/how-to-create-custom-index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/internals/how-to-create-custom-index.rst b/doc/internals/how-to-create-custom-index.rst index 4621e052539..dbc24b6fb54 100644 --- a/doc/internals/how-to-create-custom-index.rst +++ b/doc/internals/how-to-create-custom-index.rst @@ -224,6 +224,7 @@ custom index to a Dataset or DataArray, e.g., using the ``RasterIndex`` above: dims=("y", "x"), ) + # Build a RasterIndex from the 'x' and 'y' coordinates # Xarray creates default indexes for the 'x' and 'y' coordinates # Use drop_existing=True to replace them with a custom index da_raster = da.set_xindex(["x", "y"], RasterIndex, drop_existing=True) From 3ffb62731a8477cfb12d17fc8195cac16dd505b1 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Fri, 12 Dec 2025 11:20:27 -0500 Subject: [PATCH 3/3] PR number --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4bdafa3eb82..b0a42e81e06 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,7 +16,7 @@ New Features - Added ``drop_existing`` parameter to :py:meth:`Dataset.set_xindex` and :py:meth:`DataArray.set_xindex` to allow replacing existing indexes without - needing to call :py:meth:`drop_indexes` first (:pull:`XXXX`). + needing to call :py:meth:`drop_indexes` first (:pull:`11008`). By `Ian Hunt-Isaak `_. Breaking Changes