Skip to content

Commit

Permalink
Merge pull request #5589 from jenshnielsen/smart_multi_index
Browse files Browse the repository at this point in the history
Smarter selection of multi index export to xarray
  • Loading branch information
jenshnielsen authored Dec 14, 2023
2 parents 9cb94dc + 833a878 commit b0ef240
Show file tree
Hide file tree
Showing 7 changed files with 270 additions and 21 deletions.
5 changes: 5 additions & 0 deletions docs/changes/newsfragments/5589.improved
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
The use of multi index when exporting to xarray (and netcdf files) has been made smarter
such that any dataset with a known shape (such as those measured by doNd etc) will
never be automatically exported using multi index even in the case of incomplete datasets (i.e. due to an interrupted measurement).
Furthermore `to_xarray_dataset` and `to_xarray_dataarray_dict` have gained a key word argument `use_multi_index` to allow the user
to control the use of multi indexes.
39 changes: 32 additions & 7 deletions src/qcodes/dataset/data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pathlib import Path
from queue import Queue
from threading import Thread
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Literal

import numpy
from tqdm.auto import trange
Expand Down Expand Up @@ -971,6 +971,7 @@ def to_xarray_dataarray_dict(
*params: str | ParamSpec | ParameterBase,
start: int | None = None,
end: int | None = None,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> dict[str, xr.DataArray]:
"""
Returns the values stored in the :class:`.DataSet` for the specified parameters
Expand Down Expand Up @@ -1001,6 +1002,18 @@ def to_xarray_dataarray_dict(
if None
end: end value of selection range (by results count); ignored if
None
use_multi_index: Should the data be exported using a multi index
rather than regular cartesian indexes. With regular cartesian
coordinates, the xarray dimensions are calculated from the sets or all
values along the setpoint axis of the QCoDeS dataset. Any position
in this grid not corresponding to a measured value will be filled
with a placeholder (typically NaN) potentially creating a sparse
dataset with significant storage overhead.
Multi index avoids this and is therefor better
suited for data that is known to not be on a grid.
If set to "auto" multi index will be used if projecting the data onto
a grid requires filling non measured values with NaN and the shapes
of the data has not been set in the run description.
Returns:
Dictionary from requested parameter names to :py:class:`xr.DataArray` s
Expand All @@ -1012,10 +1025,10 @@ def to_xarray_dataarray_dict(
dataarray_dict = ds.to_xarray_dataarray_dict()
"""
data = self.get_parameter_data(*params,
start=start,
end=end)
datadict = load_to_xarray_dataarray_dict(self, data)
data = self.get_parameter_data(*params, start=start, end=end)
datadict = load_to_xarray_dataarray_dict(
self, data, use_multi_index=use_multi_index
)

return datadict

Expand All @@ -1024,6 +1037,7 @@ def to_xarray_dataset(
*params: str | ParamSpec | ParameterBase,
start: int | None = None,
end: int | None = None,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> xr.Dataset:
"""
Returns the values stored in the :class:`.DataSet` for the specified parameters
Expand Down Expand Up @@ -1052,7 +1066,18 @@ def to_xarray_dataset(
if None
end: end value of selection range (by results count); ignored if
None
use_multi_index: Should the data be exported using a multi index
rather than regular cartesian indexes. With regular cartesian
coordinates, the xarray dimensions are calculated from the sets or all
values along the setpoint axis of the QCoDeS dataset. Any position
in this grid not corresponding to a measured value will be filled
with a placeholder (typically NaN) potentially creating a sparse
dataset with significant storage overhead.
Multi index avoids this and is therefor better
suited for data that is known to not be on a grid.
If set to "auto" multi index will be used if projecting the data onto
a grid requires filling non measured values with NaN and the shapes
of the data has not been set in the run description.
Returns:
:py:class:`xr.Dataset` with the requested parameter(s) data as
:py:class:`xr.DataArray` s and coordinates formed by the dependencies.
Expand All @@ -1066,7 +1091,7 @@ def to_xarray_dataset(
start=start,
end=end)

return load_to_xarray_dataset(self, data)
return load_to_xarray_dataset(self, data, use_multi_index=use_multi_index)

def write_data_to_text_file(
self, path: str, single_file: bool = False, single_file_name: str | None = None
Expand Down
18 changes: 13 additions & 5 deletions src/qcodes/dataset/data_set_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
from collections.abc import Mapping
from typing import TYPE_CHECKING, Generic, TypeVar
from typing import TYPE_CHECKING, Generic, Literal, TypeVar

import numpy as np

Expand Down Expand Up @@ -177,7 +177,9 @@ def to_pandas(self) -> dict[str, pd.DataFrame]:
"""
return self.to_pandas_dataframe_dict()

def to_xarray_dataarray_dict(self) -> dict[str, xr.DataArray]:
def to_xarray_dataarray_dict(
self, *, use_multi_index: Literal["auto", "always", "never"] = "auto"
) -> dict[str, xr.DataArray]: # noqa: F821
"""
Returns the values stored in the :class:`.dataset.data_set.DataSet` as a dict of
:py:class:`xr.DataArray` s
Expand All @@ -190,9 +192,13 @@ def to_xarray_dataarray_dict(self) -> dict[str, xr.DataArray]:
"""
data = self.data()
return load_to_xarray_dataarray_dict(self._dataset, data)
return load_to_xarray_dataarray_dict(
self._dataset, data, use_multi_index=use_multi_index
)

def to_xarray_dataset(self) -> xr.Dataset:
def to_xarray_dataset(
self, *, use_multi_index: Literal["auto", "always", "never"] = "auto"
) -> xr.Dataset:
"""
Returns the values stored in the :class:`.dataset.data_set.DataSet` as a
:py:class:`xr.Dataset` object.
Expand All @@ -207,7 +213,9 @@ def to_xarray_dataset(self) -> xr.Dataset:
"""
data = self.data()
return load_to_xarray_dataset(self._dataset, data)
return load_to_xarray_dataset(
self._dataset, data, use_multi_index=use_multi_index
)


def load_new_data_from_db_and_append(
Expand Down
4 changes: 3 additions & 1 deletion src/qcodes/dataset/data_set_in_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import warnings
from collections.abc import Mapping, Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable
from typing import TYPE_CHECKING, Any, Callable, Literal

import numpy as np

Expand Down Expand Up @@ -838,6 +838,7 @@ def to_xarray_dataarray_dict(
*params: str | ParamSpec | ParameterBase,
start: int | None = None,
end: int | None = None,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> dict[str, xr.DataArray]:
self._warn_if_set(*params, start=start, end=end)
return self.cache.to_xarray_dataarray_dict()
Expand All @@ -847,6 +848,7 @@ def to_xarray_dataset(
*params: str | ParamSpec | ParameterBase,
start: int | None = None,
end: int | None = None,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> xr.Dataset:
self._warn_if_set(*params, start=start, end=end)
return self.cache.to_xarray_dataset()
Expand Down
12 changes: 11 additions & 1 deletion src/qcodes/dataset/data_set_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@
from collections.abc import Mapping, Sequence
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Protocol, Union, runtime_checkable
from typing import (
TYPE_CHECKING,
Any,
Callable,
Literal,
Protocol,
Union,
runtime_checkable,
)

import numpy as np
from typing_extensions import TypeAlias
Expand Down Expand Up @@ -257,6 +265,7 @@ def to_xarray_dataarray_dict(
*params: str | ParamSpec | ParameterBase,
start: int | None = None,
end: int | None = None,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> dict[str, xr.DataArray]:
...

Expand All @@ -265,6 +274,7 @@ def to_xarray_dataset(
*params: str | ParamSpec | ParameterBase,
start: int | None = None,
end: int | None = None,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> xr.Dataset:
...

Expand Down
42 changes: 35 additions & 7 deletions src/qcodes/dataset/exporters/export_to_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collections.abc import Hashable, Mapping
from math import prod
from pathlib import Path
from typing import TYPE_CHECKING, cast
from typing import TYPE_CHECKING, Literal, cast

import numpy as np
from tqdm.dask import TqdmCallback
Expand Down Expand Up @@ -61,11 +61,19 @@ def _calculate_index_shape(idx: pd.Index | pd.MultiIndex) -> dict[Hashable, int]


def _load_to_xarray_dataarray_dict_no_metadata(
dataset: DataSetProtocol, datadict: Mapping[str, Mapping[str, np.ndarray]]
dataset: DataSetProtocol,
datadict: Mapping[str, Mapping[str, np.ndarray]],
*,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> dict[str, xr.DataArray]:
import pandas as pd
import xarray as xr

if use_multi_index not in ("auto", "always", "never"):
raise ValueError(
f"Invalid value for use_multi_index. Expected one of 'auto', 'always', 'never' but got {use_multi_index}"
)

data_xrdarray_dict: dict[str, xr.DataArray] = {}

for name, subdict in datadict.items():
Expand Down Expand Up @@ -94,8 +102,16 @@ def _load_to_xarray_dataarray_dict_no_metadata(
index_prod = prod(calc_index.values())
# if the product of the len of individual index dims == len(total_index)
# we are on a grid

on_grid = index_prod == len(index)
if not on_grid:

export_with_multi_index = (
not on_grid
and dataset.description.shapes is None
and use_multi_index == "auto"
) or use_multi_index == "always"

if export_with_multi_index:
assert isinstance(df.index, pd.MultiIndex)

if hasattr(xr, "Coordinates"):
Expand All @@ -115,9 +131,14 @@ def _load_to_xarray_dataarray_dict_no_metadata(


def load_to_xarray_dataarray_dict(
dataset: DataSetProtocol, datadict: Mapping[str, Mapping[str, np.ndarray]]
dataset: DataSetProtocol,
datadict: Mapping[str, Mapping[str, np.ndarray]],
*,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> dict[str, xr.DataArray]:
dataarrays = _load_to_xarray_dataarray_dict_no_metadata(dataset, datadict)
dataarrays = _load_to_xarray_dataarray_dict_no_metadata(
dataset, datadict, use_multi_index=use_multi_index
)

for dataname, dataarray in dataarrays.items():
_add_param_spec_to_xarray_coords(dataset, dataarray)
Expand Down Expand Up @@ -157,7 +178,12 @@ def _add_metadata_to_xarray(
xrdataset.attrs[metadata_tag] = metadata


def load_to_xarray_dataset(dataset: DataSetProtocol, data: ParameterData) -> xr.Dataset:
def load_to_xarray_dataset(
dataset: DataSetProtocol,
data: ParameterData,
*,
use_multi_index: Literal["auto", "always", "never"] = "auto",
) -> xr.Dataset:
import xarray as xr

if not _same_setpoints(data):
Expand All @@ -168,7 +194,9 @@ def load_to_xarray_dataset(dataset: DataSetProtocol, data: ParameterData) -> xr.
"independent parameter to its own datarray."
)

data_xrdarray_dict = _load_to_xarray_dataarray_dict_no_metadata(dataset, data)
data_xrdarray_dict = _load_to_xarray_dataarray_dict_no_metadata(
dataset, data, use_multi_index=use_multi_index
)

# Casting Hashable for the key type until python/mypy#1114
# and python/typing#445 are resolved.
Expand Down
Loading

0 comments on commit b0ef240

Please sign in to comment.