Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make illegal path-like variable names when constructing a DataTree from a Dataset #9378

3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ Breaking changes
Bug fixes
~~~~~~~~~

- Make illegal path-like variable names when constructing a DataTree from a Dataset
(:issue:`9339`, :pull:`9378`)
By `Etienne Schalk <https://github.com/etienneschalk>`_.
- Fix bug with rechunking to a frequency when some periods contain no data (:issue:`9360`).
By `Deepak Cherian <https://github.com/dcherian>`_.
- Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`).
Expand Down
13 changes: 13 additions & 0 deletions xarray/core/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,18 @@ def check_alignment(
check_alignment(child_path, child_ds, base_ds, child.children)


def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None:
offending_variable_names = [
name for name in variables if isinstance(name, str) and "/" in name
]
if len(offending_variable_names) > 0:
raise ValueError(
"Given variables have names containing the '/' character: "
f"{offending_variable_names}. "
"Variables stored in DataTree objects cannot have names containing '/' characters, as this would make path-like access to variables ambiguous."
)


class DatasetView(Dataset):
"""
An immutable Dataset-like view onto the data in a single DataTree node.
Expand Down Expand Up @@ -457,6 +469,7 @@ def __init__(
self.children = {name: child.copy() for name, child in children.items()}

def _set_node_data(self, dataset: Dataset):
_check_for_slashes_in_names(dataset.variables)
data_vars, coord_vars = _collect_data_and_coord_variables(dataset)
self._data_variables = data_vars
self._node_coord_variables = coord_vars
Expand Down
17 changes: 17 additions & 0 deletions xarray/tests/test_datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,23 @@ def test_child_gets_named_on_attach(self):
mary = DataTree(children={"Sue": sue}) # noqa
assert mary.children["Sue"].name == "Sue"

def test_dataset_containing_slashes(self):
xda: xr.DataArray = xr.DataArray(
[[1, 2]],
coords={"label": ["a"], "R30m/y": [30, 60]},
)
xds: xr.Dataset = xr.Dataset({"group/subgroup/my_variable": xda})
with pytest.raises(
ValueError,
match=re.escape(
"Given variables have names containing the '/' character: "
"['R30m/y', 'group/subgroup/my_variable']. "
"Variables stored in DataTree objects cannot have names containing '/' characters, "
"as this would make path-like access to variables ambiguous."
),
):
DataTree(xds)


class TestPaths:
def test_path_property(self):
Expand Down
Loading