Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ Bug Fixes
- Fix error handling issue in ``decode_cf_variables`` when decoding fails - the exception is now re-raised
correctly, with a note added about the variable name that caused the error (:issue:`10873`, :pull:`10886`).
By `Jonas L. Bertelsen <https://github.com/jonaslb>`_
- When assigning an indexed coordinate to a data variable or coordinate, coerce it from
``IndexVariable`` to ``Variable`` (:issue:`9859`, :issue:`10829`, :pull:`10909`)
By `Julia Signell <https://github.com/jsignell>`_

Performance
~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,7 +1265,7 @@ def create_coords_with_default_indexes(
variables.update(idx_vars)
all_variables.update(idx_vars)
else:
variables[name] = variable
variables[name] = variable.to_base_variable()

new_coords = Coordinates._construct_direct(coords=variables, indexes=indexes)

Expand Down
13 changes: 11 additions & 2 deletions xarray/structure/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@
emit_user_level_warning,
equivalent,
)
from xarray.core.variable import Variable, as_variable, calculate_dimensions
from xarray.core.variable import (
IndexVariable,
Variable,
as_variable,
calculate_dimensions,
)
from xarray.structure.alignment import deep_align
from xarray.util.deprecation_helpers import (
_COMPAT_DEFAULT,
Expand Down Expand Up @@ -1206,7 +1211,11 @@ def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeRe
if c not in value.dims and c in dataset.coords
]
if coord_names:
other[key] = value.drop_vars(coord_names)
value = value.drop_vars(coord_names)
if isinstance(value.variable, IndexVariable):
variable = value.variable.to_base_variable()
value = value._replace(variable=variable)
other[key] = value

return merge_core(
[dataset, other],
Expand Down
45 changes: 25 additions & 20 deletions xarray/testing/assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
import warnings
from collections.abc import Hashable
from typing import Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -401,11 +402,25 @@ def _assert_indexes_invariants_checks(
)


def _assert_variable_invariants(var: Variable, name: Hashable = None):
def _assert_variable_invariants(
var: Variable | Any,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check now happens within this function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with https://github.com/pydata/xarray/pull/10909/files#r2528970158, it may be better to keep the IndexVariable vs Variable check at the level of Dataset and/or DataArray invariants.

Actually, the right place should probably be _assert_indexes_invariants_checks, where we could add the check that non-index variables are not instances of IndexVariable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the IndexVariable check, but I still think it is nicer to keep the Variable check within this function. Let me know if you'd still rather I move it out.

name: Hashable = None,
check_default_indexes: bool = True,
is_index: bool = False,
):
if name is None:
name_or_empty: tuple = ()
else:
name_or_empty = (name,)

assert isinstance(var, Variable), {name: type(var)}
if name:
if var.dims == name_or_empty:
if check_default_indexes:
assert isinstance(var, IndexVariable), {name: type(var)}
elif not is_index:
assert not isinstance(var, IndexVariable), {name: type(var)}

assert isinstance(var._dims, tuple), name_or_empty + (var._dims,)
assert len(var._dims) == len(var._data.shape), name_or_empty + (
var._dims,
Expand All @@ -418,25 +433,20 @@ def _assert_variable_invariants(var: Variable, name: Hashable = None):


def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool):
assert isinstance(da._variable, Variable), da._variable
_assert_variable_invariants(da._variable)
_assert_variable_invariants(da._variable, name=da.name, check_default_indexes=False)

assert isinstance(da._coords, dict), da._coords
assert all(isinstance(v, Variable) for v in da._coords.values()), da._coords

if check_default_indexes:
assert all(set(v.dims) <= set(da.dims) for v in da._coords.values()), (
da.dims,
{k: v.dims for k, v in da._coords.items()},
)
assert all(
isinstance(v, IndexVariable)
for (k, v) in da._coords.items()
if v.dims == (k,)
), {k: type(v) for k, v in da._coords.items()}

for k, v in da._coords.items():
_assert_variable_invariants(v, k)
_assert_variable_invariants(
v, k, check_default_indexes=check_default_indexes, is_index=k in da._indexes
)

if da._indexes is not None:
_assert_indexes_invariants_checks(
Expand All @@ -446,9 +456,11 @@ def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool):

def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool):
assert isinstance(ds._variables, dict), type(ds._variables)
assert all(isinstance(v, Variable) for v in ds._variables.values()), ds._variables

for k, v in ds._variables.items():
_assert_variable_invariants(v, k)
_assert_variable_invariants(
v, k, check_default_indexes=check_default_indexes, is_index=k in ds._indexes
)

assert isinstance(ds._coord_names, set), ds._coord_names
assert ds._coord_names <= ds._variables.keys(), (
Expand All @@ -466,13 +478,6 @@ def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool):
ds._dims[k] == v.sizes[k] for v in ds._variables.values() for k in v.sizes
), (ds._dims, {k: v.sizes for k, v in ds._variables.items()})

if check_default_indexes:
assert all(
isinstance(v, IndexVariable)
for (k, v) in ds._variables.items()
if v.dims == (k,)
), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)}

if ds._indexes is not None:
_assert_indexes_invariants_checks(
ds._indexes, ds._variables, ds._dims, check_default=check_default_indexes
Expand All @@ -492,7 +497,7 @@ def _assert_internal_invariants(
private APIs.
"""
if isinstance(xarray_obj, Variable):
_assert_variable_invariants(xarray_obj)
_assert_variable_invariants(xarray_obj, check_default_indexes=False)
elif isinstance(xarray_obj, DataArray):
_assert_dataarray_invariants(
xarray_obj, check_default_indexes=check_default_indexes
Expand Down
5 changes: 5 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,6 +1702,11 @@ def should_add_coord_to_array(self, name, var, dims):
assert_identical(actual, expected, check_default_indexes=False)
assert "x_bnds" not in actual.dims

def test_assign_coords_uses_base_variable_class(self) -> None:
a = DataArray([0, 1, 2], dims=["x"], coords={"x": [0, 1, 2]})
a = a.assign_coords(foo=a.x)
_assert_internal_invariants(a, check_default_indexes=True)

def test_coords_alignment(self) -> None:
lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])])
rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])])
Expand Down
11 changes: 9 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4311,9 +4311,11 @@ def test_to_stacked_array_preserves_dtype(self) -> None:
# coordinate created from variables names should be of string dtype
data = np.array(["a", "a", "a", "b"], dtype="<U1")
expected_stacked_variable = DataArray(name="variable", data=data, dims="z")

# coerce from `IndexVariable` to `Variable` before comparing
assert_identical(
stacked.coords["variable"].drop_vars(["z", "variable", "y"]),
expected_stacked_variable,
stacked["variable"].variable.to_base_variable(),
expected_stacked_variable.variable,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the only test change that was needed. Basically stacked["variable"].variable is an IndexVariable because it is represents a coord that is part of a MultiIndex. But you can't know that if you only have access to the DataArray with no coords. So I just switched it to be comparing variable and to coerce to the Variable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah multi-indexes are where IndexVariable really bites us

e.g. #8887

)

def test_to_stacked_array_transposed(self) -> None:
Expand Down Expand Up @@ -4779,6 +4781,11 @@ def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None:
with pytest.raises(ValueError, match=error_regex):
actual[var_list] = data

def test_setitem_uses_base_variable_class_even_for_index_variables(self) -> None:
ds = Dataset(coords={"x": [1, 2, 3]})
ds["y"] = ds["x"]
_assert_internal_invariants(ds, check_default_indexes=True)

def test_assign(self) -> None:
ds = Dataset()
actual = ds.assign(x=[0, 1, 2], y=2)
Expand Down
Loading