From 959b8691749e92a48ef1a05f1df4ef4c7bbc616a Mon Sep 17 00:00:00 2001 From: Jens Hedegaard Nielsen Date: Thu, 1 Feb 2024 14:14:22 +0000 Subject: [PATCH 1/2] Merge pull request #5711 from jenshnielsen/load_by_guid_lazy Make loading from netcdf lazy using load_by_guid etc. --- docs/changes/newsfragments/5711.improved | 1 + src/qcodes/dataset/data_set_in_memory.py | 37 ++++------- tests/dataset/test_dataset_export.py | 81 ++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 24 deletions(-) create mode 100644 docs/changes/newsfragments/5711.improved diff --git a/docs/changes/newsfragments/5711.improved b/docs/changes/newsfragments/5711.improved new file mode 100644 index 00000000000..d2e3d3aecda --- /dev/null +++ b/docs/changes/newsfragments/5711.improved @@ -0,0 +1 @@ +As an extension to the feature added in #5627 datasets are also no longer converted into QCoDeS format when loaded from netcdf using ``load_by_guid``, ``load_by_id``, ``load_by_run_spec``, ``load_by_counter`` diff --git a/src/qcodes/dataset/data_set_in_memory.py b/src/qcodes/dataset/data_set_in_memory.py index 802fd9b432e..880293c8da2 100644 --- a/src/qcodes/dataset/data_set_in_memory.py +++ b/src/qcodes/dataset/data_set_in_memory.py @@ -332,34 +332,23 @@ def _load_from_db(cls, conn: ConnectionPlus, guid: str) -> DataSetInMem: export_info=export_info, snapshot=run_attributes["snapshot"], ) - xr_path = export_info.export_paths.get("nc") + xr_path_temp = export_info.export_paths.get("nc") + xr_path = Path(xr_path_temp) if xr_path_temp is not None else None cls._set_cache_from_netcdf(ds, xr_path) return ds @classmethod - def _set_cache_from_netcdf(cls, ds: DataSetInMem, xr_path: str | None) -> bool: - import cf_xarray as cfxr - import xarray as xr + def _set_cache_from_netcdf(cls, ds: DataSetInMem, xr_path: Path | None) -> bool: success = True - if xr_path is not None: - try: - loaded_data = xr.load_dataset(xr_path, engine="h5netcdf") - loaded_data = cfxr.coding.decode_compress_to_multi_index(loaded_data) - ds._cache = DataSetCacheInMem(ds) - ds._cache._data = cls._from_xarray_dataset_to_qcodes_raw_data( - loaded_data - ) - except ( - FileNotFoundError, - OSError, - ): # older versions of h5py may throw a OSError here - success = False - warnings.warn( - "Could not load raw data for dataset with guid :" - f"{ds.guid} from location {xr_path}" - ) + if xr_path is not None and xr_path.is_file(): + ds._cache = DataSetCacheDeferred(ds, xr_path) + elif xr_path is not None and not xr_path.is_file(): + success = False + warnings.warn( + "Could not load raw data for dataset with guid : {ds.guid} from location {xr_path}" + ) else: warnings.warn(f"No raw data stored for dataset with guid : {ds.guid}") success = False @@ -375,12 +364,12 @@ def set_netcdf_location(self, path: str | Path) -> None: be able to use this method to update the metadata in the database to refer to the new location. """ - if isinstance(path, Path): - path = str(path) + if isinstance(path, str): + path = Path(path) data_loaded = self._set_cache_from_netcdf(self, path) if data_loaded: export_info = self.export_info - export_info.export_paths["nc"] = path + export_info.export_paths["nc"] = str(path) self._set_export_info(export_info) else: raise FileNotFoundError(f"Could not load a netcdf file from {path}") diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index a177023f754..780192dc57e 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -20,6 +20,7 @@ DataSetType, Measurement, get_data_export_path, + load_by_guid, load_by_id, load_from_netcdf, new_data_set, @@ -131,6 +132,21 @@ def _make_mock_dataset_grid(experiment) -> DataSet: return dataset +@pytest.fixture(name="mock_dataset_in_mem_grid") +def _make_mock_dataset_in_mem_grid(experiment) -> DataSetProtocol: + meas = Measurement(exp=experiment, name="in_mem_ds") + meas.register_custom_parameter("x", paramtype="numeric") + meas.register_custom_parameter("y", paramtype="numeric") + meas.register_custom_parameter("z", paramtype="numeric", setpoints=("x", "y")) + + with meas.run(dataset_class=DataSetType.DataSetInMem) as datasaver: + for x in range(10): + for y in range(20, 25): + results: list[tuple[str, int]] = [("x", x), ("y", y), ("z", x + y)] + datasaver.add_result(*results) + return datasaver.dataset + + @pytest.fixture(name="mock_dataset_grid_with_shapes") def _make_mock_dataset_grid_with_shapes(experiment) -> DataSet: dataset = new_data_set("dataset") @@ -1408,3 +1424,68 @@ def test_export_lazy_load( getattr(ds, function_name)() assert ds.cache._data != {} + + +@given( + function_name=hst.sampled_from( + [ + "to_xarray_dataarray_dict", + "to_pandas_dataframe", + "to_pandas_dataframe_dict", + "get_parameter_data", + ] + ) +) +@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,), deadline=None) +def test_export_lazy_load_in_mem_dataset( + tmp_path_factory: TempPathFactory, + mock_dataset_in_mem_grid: DataSet, + function_name: str, +) -> None: + tmp_path = tmp_path_factory.mktemp("export_netcdf") + path = str(tmp_path) + mock_dataset_in_mem_grid.export( + export_type="netcdf", path=tmp_path, prefix="qcodes_" + ) + + xr_ds = mock_dataset_in_mem_grid.to_xarray_dataset() + assert xr_ds["z"].dims == ("x", "y") + + expected_path = f"qcodes_{mock_dataset_in_mem_grid.captured_run_id}_{mock_dataset_in_mem_grid.guid}.nc" + assert os.listdir(path) == [expected_path] + file_path = os.path.join(path, expected_path) + ds = load_from_netcdf(file_path) + + # loading the dataset should not load the actual data into cache + assert ds.cache._data == {} + # loading directly into xarray should not round + # trip to qcodes format and therefor not fill the cache + xr_ds_reimported = ds.to_xarray_dataset() + assert ds.cache._data == {} + + assert xr_ds_reimported["z"].dims == ("x", "y") + assert xr_ds.identical(xr_ds_reimported) + + # but loading with any of these functions + # will currently fill the cache + getattr(ds, function_name)() + + assert ds.cache._data != {} + + dataset_loaded_by_guid = load_by_guid(mock_dataset_in_mem_grid.guid) + + # loading the dataset should not load the actual data into cache + assert dataset_loaded_by_guid.cache._data == {} + # loading directly into xarray should not round + # trip to qcodes format and therefor not fill the cache + xr_ds_reimported = dataset_loaded_by_guid.to_xarray_dataset() + assert dataset_loaded_by_guid.cache._data == {} + + assert xr_ds_reimported["z"].dims == ("x", "y") + assert xr_ds.identical(xr_ds_reimported) + + # but loading with any of these functions + # will currently fill the cache + getattr(dataset_loaded_by_guid, function_name)() + + assert dataset_loaded_by_guid.cache._data != {} From 49b3f61b974afe5ced2c6eb993b8560694be3030 Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 1 Feb 2024 15:59:05 +0100 Subject: [PATCH 2/2] add changelog for 0.44.1 --- docs/changes/0.44.1.rst | 7 +++++++ docs/changes/index.rst | 1 + docs/changes/newsfragments/5711.improved | 1 - 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 docs/changes/0.44.1.rst delete mode 100644 docs/changes/newsfragments/5711.improved diff --git a/docs/changes/0.44.1.rst b/docs/changes/0.44.1.rst new file mode 100644 index 00000000000..bcaeed5323d --- /dev/null +++ b/docs/changes/0.44.1.rst @@ -0,0 +1,7 @@ +QCoDeS 0.44.1 (2024-02-01) +========================== + +Improved: +--------- + +- As an extension to the feature added in #5627 datasets are also no longer converted into QCoDeS format when loaded from netcdf using ``load_by_guid``, ``load_by_id``, ``load_by_run_spec``, ``load_by_counter`` (:pr:`5711`) diff --git a/docs/changes/index.rst b/docs/changes/index.rst index da7c3b1f242..9cbf575d4bf 100644 --- a/docs/changes/index.rst +++ b/docs/changes/index.rst @@ -3,6 +3,7 @@ Changelogs .. toctree:: Unreleased + 0.44.1 <0.44.1> 0.44.0 <0.44.0> 0.43.0 <0.43.0> 0.42.1 <0.42.1> diff --git a/docs/changes/newsfragments/5711.improved b/docs/changes/newsfragments/5711.improved deleted file mode 100644 index d2e3d3aecda..00000000000 --- a/docs/changes/newsfragments/5711.improved +++ /dev/null @@ -1 +0,0 @@ -As an extension to the feature added in #5627 datasets are also no longer converted into QCoDeS format when loaded from netcdf using ``load_by_guid``, ``load_by_id``, ``load_by_run_spec``, ``load_by_counter``