From c3fe63be49c899e975185642094037beb1b7625f Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 27 Feb 2021 23:45:40 +0100 Subject: [PATCH 1/5] expose combine_attrs to open_mfdataset --- xarray/backends/api.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 4fa34b39925..34c82a1b4b0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -741,6 +741,7 @@ def open_mfdataset( parallel=False, join="outer", attrs_file=None, + combine_attrs="drop", **kwargs, ): """Open multiple files as a single dataset. @@ -960,7 +961,7 @@ def open_mfdataset( coords=coords, ids=ids, join=join, - combine_attrs="drop", + combine_attrs=combine_attrs, ) elif combine == "by_coords": # Redo ordering from coordinates, ignoring how they were ordered @@ -971,7 +972,7 @@ def open_mfdataset( data_vars=data_vars, coords=coords, join=join, - combine_attrs="drop", + combine_attrs=combine_attrs, ) else: raise ValueError( From b6cc61115458df1976ad540f2c889c8ea1ee697a Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 27 Feb 2021 23:56:57 +0100 Subject: [PATCH 2/5] add tests for passing combine_attrs to open_mfdataset --- xarray/tests/test_backends.py | 54 +++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d15736e608d..ffd177d792a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2996,6 +2996,60 @@ def test_open_mfdataset_does_same_as_concat(self, combine, opt, join): ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join) assert_identical(ds, ds_expect) + @pytest.mark.parametrize( + ["combine_attrs", "attrs", "expected", "expect_error"], + ( + pytest.param("drop", [{"a": 1}, {"a": 2}], {}, False, id="drop"), + pytest.param( + "override", [{"a": 1}, {"a": 2}], {"a": 1}, False, id="override" + ), + pytest.param( + "no_conflicts", [{"a": 1}, {"a": 2}], None, True, id="no_conflicts" + ), + pytest.param( + "identical", + [{"a": 1, "b": 2}, {"a": 1, "c": 3}], + None, + True, + id="identical", + ), + pytest.param( + "drop_conflicts", + [{"a": 1, "b": 2}, {"b": -1, "c": 3}], + {"a": 1, "c": 3}, + False, + id="drop_conflicts", + ), + ), + ) + def test_open_mfdataset_dataset_combine_attrs( + self, combine_attrs, attrs, expected, expect_error + ): + with self.setup_files_and_datasets() as (files, [ds1, ds2]): + # Give the files an inconsistent attribute + for i, f in enumerate(files): + ds = open_dataset(f).load() + ds.attrs = attrs[i] + ds.close() + ds.to_netcdf(f) + + if expect_error: + with pytest.raises(xr.MergeError): + xr.open_mfdataset( + files, + combine="by_coords", + concat_dim="t", + combine_attrs=combine_attrs, + ) + else: + with xr.open_mfdataset( + files, + combine="by_coords", + concat_dim="t", + combine_attrs=combine_attrs, + ) as ds: + assert ds.attrs == expected + def test_open_mfdataset_dataset_attr_by_coords(self): """ Case when an attribute differs across the multiple files From f43da88e677072fe6c57d1cd20750628d6a16c39 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 27 Feb 2021 23:57:52 +0100 Subject: [PATCH 3/5] don't override the main dataset attrs --- xarray/backends/api.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 34c82a1b4b0..e54a6c351ed 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -995,8 +995,6 @@ def multi_file_closer(): if isinstance(attrs_file, Path): attrs_file = str(attrs_file) combined.attrs = datasets[paths.index(attrs_file)].attrs - else: - combined.attrs = datasets[0].attrs return combined From e64b34bce390986ddfb97c2a169d0eab5500e905 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 31 Mar 2021 21:21:41 +0200 Subject: [PATCH 4/5] switch the default to "override" which seems closer to current behavior --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index fb3140dce73..ab6ea3b1631 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -717,7 +717,7 @@ def open_mfdataset( parallel=False, join="outer", attrs_file=None, - combine_attrs="drop", + combine_attrs="override", **kwargs, ): """Open multiple files as a single dataset. From c03da0bc6e039876c60c22a7c47ab06adb826496 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 31 Mar 2021 21:24:50 +0200 Subject: [PATCH 5/5] update whats-new.rst [skip-ci] --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 77d6296acac..aa684ccefbe 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -61,6 +61,8 @@ New Features :py:class:`~core.groupby.DataArrayGroupBy`, inspired by pandas' :py:meth:`~pandas.core.groupby.GroupBy.get_group`. By `Deepak Cherian `_. +- Add a ``combine_attrs`` parameter to :py:func:`open_mfdataset` (:pull:`4971`). + By `Justus Magin `_. Breaking changes ~~~~~~~~~~~~~~~~