From bbe7d0536053a0939baaa808001329f3d13f487a Mon Sep 17 00:00:00 2001 From: eschalk Date: Sat, 3 Feb 2024 17:19:10 +0100 Subject: [PATCH 01/18] New alignment option: join='strict' --- doc/whats-new.rst | 4 ++ xarray/backends/api.py | 19 ++++--- xarray/core/alignment.py | 43 +++++++++++--- xarray/core/combine.py | 5 ++ xarray/core/concat.py | 3 + xarray/core/merge.py | 2 + xarray/core/options.py | 2 + xarray/core/types.py | 2 +- xarray/tests/test_concat.py | 101 +++++++++++++++++++++++++++++++++ xarray/tests/test_dataarray.py | 20 +++++++ 10 files changed, 184 insertions(+), 17 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1b0f2f18efb..6dfd824147f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,10 @@ New Features - Allow negative frequency strings (e.g. ``"-1YE"``). These strings are for example used in :py:func:`date_range`, and :py:func:`cftime_range` (:pull:`8651`). By `Mathias Hauser `_. +- Added a ``join="exact"`` mode for ``Aligner.align`` and related classes. + (:issue:`7132`, :issue:`8230`). + By `Etienne Schalk `_. + - Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to` (:pull:`8380`) By `Anderson Banihirwe `_. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 670a0ec6d68..80c38a24c44 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -791,13 +791,15 @@ def open_dataarray( def open_mfdataset( paths: str | NestedSequence[str | os.PathLike], chunks: T_Chunks | None = None, - concat_dim: str - | DataArray - | Index - | Sequence[str] - | Sequence[DataArray] - | Sequence[Index] - | None = None, + concat_dim: ( + str + | DataArray + | Index + | Sequence[str] + | Sequence[DataArray] + | Sequence[Index] + | None + ) = None, compat: CompatOptions = "no_conflicts", preprocess: Callable[[Dataset], Dataset] | None = None, engine: T_Engine | None = None, @@ -912,7 +914,8 @@ def open_mfdataset( aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same - dimension must have the same size in all objects. + - "strict": similar to "exact", but less permissive. + The alignment fails if dimensions' names differ. attrs_file : str or path-like, optional Path of the file used to read global attributes from. By default global attributes are read from the first file provided, diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 28857c2d26e..b466f8938fb 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -5,7 +5,17 @@ from collections import defaultdict from collections.abc import Hashable, Iterable, Mapping from contextlib import suppress -from typing import TYPE_CHECKING, Any, Callable, Final, Generic, TypeVar, cast, overload +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Generic, + TypeVar, + cast, + get_args, + overload, +) import numpy as np import pandas as pd @@ -19,7 +29,7 @@ indexes_all_equal, safe_cast_to_index, ) -from xarray.core.types import T_Alignable +from xarray.core.types import JoinOptions, T_Alignable from xarray.core.utils import is_dict_like, is_full_slice from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions @@ -28,7 +38,6 @@ from xarray.core.dataset import Dataset from xarray.core.types import ( Alignable, - JoinOptions, T_DataArray, T_Dataset, T_DuckArray, @@ -145,7 +154,7 @@ def __init__( self.objects = tuple(objects) self.objects_matching_indexes = () - if join not in ["inner", "outer", "override", "exact", "left", "right"]: + if join not in get_args(JoinOptions): raise ValueError(f"invalid value for join: {join}") self.join = join @@ -264,13 +273,13 @@ def find_matching_indexes(self) -> None: self.all_indexes = all_indexes self.all_index_vars = all_index_vars - if self.join == "override": + if self.join in ("override", "strict"): for dim_sizes in all_indexes_dim_sizes.values(): for dim, sizes in dim_sizes.items(): if len(sizes) > 1: raise ValueError( - "cannot align objects with join='override' with matching indexes " - f"along dimension {dim!r} that don't have the same size" + f"cannot align objects with join={self.join!r} with matching indexes " + f"along dimension {dim!r} that don't have the same size ({sizes!r})" ) def find_matching_unindexed_dims(self) -> None: @@ -472,12 +481,27 @@ def assert_unindexed_dim_sizes_equal(self) -> None: ) else: add_err_msg = "" + # Same for indexed dims? if len(sizes) > 1: raise ValueError( f"cannot reindex or align along dimension {dim!r} " f"because of conflicting dimension sizes: {sizes!r}" + add_err_msg ) + def assert_equal_dimension_names(self) -> None: + # Strict mode only allows objects having the exact same dimensions' names. + if not self.join == "strict": + return + + unique_dims = set(tuple(o.sizes) for o in self.objects) + all_objects_have_same_dims = len(unique_dims) == 1 + if not all_objects_have_same_dims: + raise ValueError( + f"cannot align objects with join='strict' " + f"because given objects do not share the same dimension names ({[tuple(o.sizes) for o in self.objects]!r}); " + f"try using join='exact' if you only care about equal indexes" + ) + def override_indexes(self) -> None: objects = list(self.objects) @@ -568,6 +592,7 @@ def align(self) -> None: self.results = (obj.copy(deep=self.copy),) return + self.assert_equal_dimension_names() self.find_matching_indexes() self.find_matching_unindexed_dims() self.assert_no_index_conflict() @@ -576,7 +601,7 @@ def align(self) -> None: if self.join == "override": self.override_indexes() - elif self.join == "exact" and not self.copy: + elif not self.copy and (self.join in ("exact", "strict")): self.results = self.objects else: self.reindex_all() @@ -716,6 +741,8 @@ def align( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + - "strict": similar to "exact", but less permissive. + The alignment fails if dimensions' names differ. copy : bool, default: True If ``copy=True``, data in the return values is always copied. If diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 10f194bf9f5..7f7301db9f8 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -448,6 +448,9 @@ def combine_nested( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + - "strict": similar to "exact", but less permissive. + The alignment fails if dimensions' names differ. + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "drop" A callable or a string indicating how to combine attrs of the objects being @@ -737,6 +740,8 @@ def combine_by_coords( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + - "strict": similar to "exact", but less permissive. + The alignment fails if dimensions' names differ. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "no_conflicts" diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 26cf36b3b07..2dd5b647fdd 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -148,6 +148,9 @@ def concat( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + - "strict": similar to "exact", but less permissive. + The alignment fails if dimensions' names differ. + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "override" A callable or a string indicating how to combine attrs of the objects being diff --git a/xarray/core/merge.py b/xarray/core/merge.py index a8e54ad1231..aafd60d9ca4 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -790,6 +790,8 @@ def merge( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + - "strict": similar to "exact", but less permissive. + The alignment fails if dimensions' names differ. fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps diff --git a/xarray/core/options.py b/xarray/core/options.py index d116c350991..453800ea1f7 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -166,6 +166,8 @@ class set_options: - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. + - "strict": similar to "exact", but less permissive. + The alignment fails if dimensions' names differ. cmap_divergent : str or matplotlib.colors.Colormap, default: "RdBu_r" Colormap to use for divergent data plots. If string, must be diff --git a/xarray/core/types.py b/xarray/core/types.py index 8c3164c52fa..44a24dc5501 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -206,7 +206,7 @@ def copy( Literal["drop", "identical", "no_conflicts", "drop_conflicts", "override"], Callable[..., Any], ] -JoinOptions = Literal["outer", "inner", "left", "right", "exact", "override"] +JoinOptions = Literal["outer", "inner", "left", "right", "exact", "override", "strict"] Interp1dOptions = Literal[ "linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial" diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 0cf4cc03a09..0519a91516e 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from copy import deepcopy from typing import TYPE_CHECKING, Any, Callable @@ -1261,3 +1262,103 @@ def test_concat_index_not_same_dim() -> None: match=r"Cannot concatenate along dimension 'x' indexes with dimensions.*", ): concat([ds1, ds2], dim="x") + + +def test_concat_join_coordinate_variables_non_asked_dims(): + ds1 = Dataset( + coords={ + "x_center": ("x_center", [1, 2, 3]), + "x_outer": ("x_outer", [0.5, 1.5, 2.5, 3.5]), + }, + ) + + ds2 = Dataset( + coords={ + "x_center": ("x_center", [4, 5, 6]), + "x_outer": ("x_outer", [4.5, 5.5, 6.5]), + }, + ) + + # Using join='outer' + expected_wrongly_concatenated_xds = Dataset( + coords={ + "x_center": ("x_center", [1, 2, 3, 4, 5, 6]), + "x_outer": ("x_outer", [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5]), + }, + ) + # Not using strict mode will allow the concatenation to surprisingly happen + # even if `x_outer` sizes do not match + actual_xds = concat( + [ds1, ds2], + dim="x_center", + data_vars="different", + coords="different", + join="outer", + ) + assert all(actual_xds == expected_wrongly_concatenated_xds) + + # Using join='strict' + # A check similar to the one made on non-indexed dimensions regarding their sizes. + with pytest.raises( + ValueError, + match=re.escape( + r"cannot align objects with join='strict' with matching indexes " + r"along dimension 'x_outer' that don't have the same size ({3, 4})" + ), + ): + concat( + [ds1, ds2], + dim="x_center", + data_vars="different", + coords="different", + join="strict", + ) + + +def test_concat_join_non_coordinate_variables(): + ds1 = Dataset( + data_vars={ + "a": ("x_center", [1, 2, 3]), + "b": ("x_outer", [0.5, 1.5, 2.5, 3.5]), + }, + ) + + ds2 = Dataset( + data_vars={ + "a": ("x_center", [4, 5, 6]), + "b": ("x_outer", [4.5, 5.5, 6.5]), + }, + ) + + # Whether join='outer' or join='strict' modes are used, + # the concatenation fails because of the behavior disallowing alignment + # of non-indexed dimensions (not attached to a coordinate variable). + with pytest.raises( + ValueError, + match=( + r"cannot reindex or align along dimension 'x_outer' " + r"because of conflicting dimension sizes: {3, 4}" + ), + ): + concat( + [ds1, ds2], + dim="x_center", + data_vars="different", + coords="different", + join="strict", + ) + + with pytest.raises( + ValueError, + match=( + r"cannot reindex or align along dimension 'x_outer' " + r"because of conflicting dimension sizes: {3, 4}" + ), + ): + concat( + [ds1, ds2], + dim="x_center", + data_vars="different", + coords="different", + join="outer", + ) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ab1fc316f77..f8dce0a94ed 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3188,6 +3188,26 @@ def test_align_str_dtype(self) -> None: assert_identical(expected_b, actual_b) assert expected_b.x.dtype == actual_b.x.dtype + def test_align_exact_vs_strict(self) -> None: + xda_1 = xr.DataArray([1], dims="x1") + xda_2 = xr.DataArray([1], dims="x2") + + # join='exact' passes + aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact") + assert aligned_1 == xda_1 + assert aligned_2 == xda_2 + + # join='strict' fails because of non-matching dimensions' names + with pytest.raises( + ValueError, + match=( + r"cannot align objects with join='strict' " + r"because given objects do not share the same dimension names " + r"([('x1',), ('x2',)])" + ), + ): + xr.align(xda_1, xda_2, join="strict") + def test_broadcast_arrays(self) -> None: x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x") y = DataArray([1, 2], coords=[("b", [3, 4])], name="y") From cddcaa16ad94c38ac2cd911d53372a6bb6e5654c Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 4 Feb 2024 20:24:02 +0100 Subject: [PATCH 02/18] Fix what's new newlines + retrigger CI --- doc/whats-new.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6dfd824147f..93dbc5a387b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,10 +29,8 @@ New Features - Added a ``join="exact"`` mode for ``Aligner.align`` and related classes. (:issue:`7132`, :issue:`8230`). By `Etienne Schalk `_. - - Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to` (:pull:`8380`) By `Anderson Banihirwe `_. - - Xarray now defers to flox's `heuristics `_ to set default `method` for groupby problems. This only applies to ``flox>=0.9``. By `Deepak Cherian `_. From 37a7b09a1c0e909d67637fe599435b3af36bc971 Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 4 Feb 2024 21:13:10 +0100 Subject: [PATCH 03/18] wrong join --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 93dbc5a387b..ceff3e93323 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,7 +26,7 @@ New Features - Allow negative frequency strings (e.g. ``"-1YE"``). These strings are for example used in :py:func:`date_range`, and :py:func:`cftime_range` (:pull:`8651`). By `Mathias Hauser `_. -- Added a ``join="exact"`` mode for ``Aligner.align`` and related classes. +- Added a ``join='strict'`` mode for ``Aligner.align`` and related classes. (:issue:`7132`, :issue:`8230`). By `Etienne Schalk `_. - Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to` From c6b1df50c7d88305f9bc04b1cfef7f5dbb5ca598 Mon Sep 17 00:00:00 2001 From: eschalk Date: Fri, 9 Feb 2024 21:23:37 +0100 Subject: [PATCH 04/18] Added test align 2d three arrays --- xarray/core/alignment.py | 1 - xarray/tests/test_dataarray.py | 32 +++++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 01987940d15..3968a03345a 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -481,7 +481,6 @@ def assert_unindexed_dim_sizes_equal(self) -> None: ) else: add_err_msg = "" - # Same for indexed dims? if len(sizes) > 1: raise ValueError( f"cannot reindex or align along dimension {dim!r} " diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6d10b2f3884..9bb857f6ed3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3194,7 +3194,7 @@ def test_align_str_dtype(self) -> None: assert_identical(expected_b, actual_b) assert expected_b.x.dtype == actual_b.x.dtype - def test_align_exact_vs_strict(self) -> None: + def test_align_exact_vs_strict_one_element(self) -> None: xda_1 = xr.DataArray([1], dims="x1") xda_2 = xr.DataArray([1], dims="x2") @@ -3206,14 +3206,36 @@ def test_align_exact_vs_strict(self) -> None: # join='strict' fails because of non-matching dimensions' names with pytest.raises( ValueError, - match=( - r"cannot align objects with join='strict' " - r"because given objects do not share the same dimension names " - r"([('x1',), ('x2',)])" + match=re.escape( + "cannot align objects with join='strict' " + "because given objects do not share the same dimension names " + "([('x1',), ('x2',)])" ), ): xr.align(xda_1, xda_2, join="strict") + def test_align_exact_vs_strict_2d(self) -> None: + xda_1 = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("y1", "x1")) + xda_2 = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("y2", "x2")) + xda_3 = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("y3", "x3")) + + # join='exact' passes + aligned_1, aligned_2, aligned_3 = xr.align(xda_1, xda_2, xda_3, join="exact") + assert (aligned_1 == xda_1).all() + assert (aligned_2 == xda_2).all() + assert (aligned_3 == xda_3).all() + + # join='strict' fails because of non-matching dimensions' names + with pytest.raises( + ValueError, + match=re.escape( + "cannot align objects with join='strict' " + "because given objects do not share the same dimension names " + "([('y1', 'x1'), ('y2', 'x2'), ('y3', 'x3')])" + ), + ): + xr.align(xda_1, xda_2, xda_3, join="strict") + def test_broadcast_arrays(self) -> None: x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x") y = DataArray([1, 2], coords=[("b", [3, 4])], name="y") From ed0414a58ac43d14524c197b7bda59ecb34c7fcd Mon Sep 17 00:00:00 2001 From: eschalk Date: Sat, 10 Feb 2024 12:38:18 +0100 Subject: [PATCH 05/18] Added tests and use assert_identical --- xarray/tests/test_concat.py | 2 +- xarray/tests/test_dataarray.py | 70 +++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 0519a91516e..df5cec228c4 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1295,7 +1295,7 @@ def test_concat_join_coordinate_variables_non_asked_dims(): coords="different", join="outer", ) - assert all(actual_xds == expected_wrongly_concatenated_xds) + assert_identical(actual_xds, expected_wrongly_concatenated_xds) # Using join='strict' # A check similar to the one made on non-indexed dimensions regarding their sizes. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 9bb857f6ed3..a16547f4f29 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3194,14 +3194,72 @@ def test_align_str_dtype(self) -> None: assert_identical(expected_b, actual_b) assert expected_b.x.dtype == actual_b.x.dtype - def test_align_exact_vs_strict_one_element(self) -> None: + def test_align_exact_vs_strict_same_dim_same_size(self) -> None: + xda_1 = xr.DataArray([1], dims="x") + xda_2 = xr.DataArray([1], dims="x") + + # join='exact' passes + aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact") + assert_identical(aligned_1, xda_1) + assert_identical(aligned_2, xda_2) + + # join='strict' passes + aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="strict") + assert_identical(aligned_1, xda_1) + assert_identical(aligned_2, xda_2) + + def test_align_exact_vs_strict_same_dim_differing_sizes(self) -> None: + xda_1 = xr.DataArray([1], dims="x") + xda_2 = xr.DataArray([1, 2], dims="x") + + # join='exact' fails because of non-matching sizes for the same 'x' dimension + with pytest.raises( + ValueError, + match=re.escape( + "cannot reindex or align along dimension 'x' because of " + "conflicting dimension sizes: {1, 2}" + ), + ): + xr.align(xda_1, xda_2, join="exact") + + # join='strict' fails because of non-matching sizes for the same 'x' dimension + with pytest.raises( + ValueError, + match=re.escape( + "cannot reindex or align along dimension 'x' because of " + "conflicting dimension sizes: {1, 2}" + ), + ): + xr.align(xda_1, xda_2, join="strict") + + def test_align_exact_vs_strict_differing_dims_same_sizes(self) -> None: xda_1 = xr.DataArray([1], dims="x1") xda_2 = xr.DataArray([1], dims="x2") # join='exact' passes aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact") - assert aligned_1 == xda_1 - assert aligned_2 == xda_2 + assert_identical(aligned_1, xda_1) + assert_identical(aligned_2, xda_2) + + # join='strict' fails because of non-matching dimensions' names + with pytest.raises( + ValueError, + match=re.escape( + "cannot align objects with join='strict' " + "because given objects do not share the same dimension names " + "([('x1',), ('x2',)])" + ), + ): + xr.align(xda_1, xda_2, join="strict") + + def test_align_exact_vs_strict_differing_dims_differing_sizes(self) -> None: + xda_1 = xr.DataArray([1], dims="x1") + xda_2 = xr.DataArray([1, 2], dims="x2") + + # join='exact' passes + aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact") + assert_identical(aligned_1, xda_1) + assert_identical(aligned_2, xda_2) # join='strict' fails because of non-matching dimensions' names with pytest.raises( @@ -3221,9 +3279,9 @@ def test_align_exact_vs_strict_2d(self) -> None: # join='exact' passes aligned_1, aligned_2, aligned_3 = xr.align(xda_1, xda_2, xda_3, join="exact") - assert (aligned_1 == xda_1).all() - assert (aligned_2 == xda_2).all() - assert (aligned_3 == xda_3).all() + assert_identical(aligned_1, xda_1) + assert_identical(aligned_2, xda_2) + assert_identical(aligned_3, xda_3) # join='strict' fails because of non-matching dimensions' names with pytest.raises( From f46b19b54b3e1bda591085bfc920b3cc3783a958 Mon Sep 17 00:00:00 2001 From: eschalk Date: Wed, 14 Feb 2024 21:15:40 +0100 Subject: [PATCH 06/18] Added tests for join=exact --- xarray/tests/test_concat.py | 46 ++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index df5cec228c4..e4d9ba122aa 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1279,24 +1279,42 @@ def test_concat_join_coordinate_variables_non_asked_dims(): }, ) - # Using join='outer' expected_wrongly_concatenated_xds = Dataset( coords={ "x_center": ("x_center", [1, 2, 3, 4, 5, 6]), "x_outer": ("x_outer", [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5]), }, ) + + # Using join='outer' # Not using strict mode will allow the concatenation to surprisingly happen # even if `x_outer` sizes do not match actual_xds = concat( [ds1, ds2], + join="outer", dim="x_center", data_vars="different", coords="different", - join="outer", ) assert_identical(actual_xds, expected_wrongly_concatenated_xds) + # Using join='exact' + with pytest.raises( + ValueError, + match=re.escape( + "cannot align objects with join='exact' where " + "index/labels/sizes are not equal along these coordinates (dimensions): " + "'x_outer' ('x_outer',)" + ), + ): + concat( + [ds1, ds2], + join="exact", + dim="x_center", + data_vars="different", + coords="different", + ) + # Using join='strict' # A check similar to the one made on non-indexed dimensions regarding their sizes. with pytest.raises( @@ -1308,14 +1326,15 @@ def test_concat_join_coordinate_variables_non_asked_dims(): ): concat( [ds1, ds2], + join="strict", dim="x_center", data_vars="different", coords="different", - join="strict", ) -def test_concat_join_non_coordinate_variables(): +@pytest.mark.parametrize("join", ("outer", "exact", "strict")) +def test_concat_join_non_coordinate_variables(join: JoinOptions): ds1 = Dataset( data_vars={ "a": ("x_center", [1, 2, 3]), @@ -1330,7 +1349,7 @@ def test_concat_join_non_coordinate_variables(): }, ) - # Whether join='outer' or join='strict' modes are used, + # Whether join='outer' or join='exact' or join='strict' modes are used, # the concatenation fails because of the behavior disallowing alignment # of non-indexed dimensions (not attached to a coordinate variable). with pytest.raises( @@ -1342,23 +1361,8 @@ def test_concat_join_non_coordinate_variables(): ): concat( [ds1, ds2], + join=join, dim="x_center", data_vars="different", coords="different", - join="strict", - ) - - with pytest.raises( - ValueError, - match=( - r"cannot reindex or align along dimension 'x_outer' " - r"because of conflicting dimension sizes: {3, 4}" - ), - ): - concat( - [ds1, ds2], - dim="x_center", - data_vars="different", - coords="different", - join="outer", ) From 95295d1590858b39fd6c56ef0da086c9a3d9698b Mon Sep 17 00:00:00 2001 From: eschalk Date: Thu, 15 Feb 2024 21:54:21 +0100 Subject: [PATCH 07/18] Try replacing join=strict by broadcast=False --- doc/user-guide/options.rst | 2 +- doc/whats-new.rst | 3 +++ xarray/backends/api.py | 2 -- xarray/core/alignment.py | 27 +++++++++++-------- xarray/core/arithmetic.py | 2 ++ xarray/core/combine.py | 4 --- xarray/core/computation.py | 8 ++++++ xarray/core/concat.py | 2 -- xarray/core/dataarray.py | 5 +++- xarray/core/dataset.py | 5 +++- xarray/core/merge.py | 2 -- xarray/core/options.py | 44 +++++++++++++++++++------------ xarray/core/types.py | 2 +- xarray/tests/test_concat.py | 23 +++------------- xarray/tests/test_dataarray.py | 48 ++++++++++++++-------------------- 15 files changed, 90 insertions(+), 89 deletions(-) diff --git a/doc/user-guide/options.rst b/doc/user-guide/options.rst index 12844eccbe4..a260bfec6c5 100644 --- a/doc/user-guide/options.rst +++ b/doc/user-guide/options.rst @@ -16,7 +16,7 @@ Xarray offers a small number of configuration options through :py:func:`set_opti - ``display_max_rows`` - ``display_style`` -2. Control behaviour during operations: ``arithmetic_join``, ``keep_attrs``, ``use_bottleneck``. +2. Control behaviour during operations: ``arithmetic_broadcast``, ``arithmetic_join``, ``keep_attrs``, ``use_bottleneck``. 3. Control colormaps for plots:``cmap_divergent``, ``cmap_sequential``. 4. Aspects of file reading: ``file_cache_maxsize``, ``warn_on_unclosed_files``. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 84f9c38b0ac..670c9c83642 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,9 @@ v2024.02.0 (unreleased) New Features ~~~~~~~~~~~~ +- Added the ability to control broadcasting for alignment, and new gloal option ``arithmetic_broadcast`` + (:issue:`6806`, :pull:`8698`). + By `Etienne Schalk `_. - Added a simple `nbytes` representation in DataArrays and Dataset `repr`. (:issue:`8690`, :pull:`8702`). By `Etienne Schalk `_. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 5cdb958b1b5..fcbd4d40bd7 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -914,8 +914,6 @@ def open_mfdataset( aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same - - "strict": similar to "exact", but less permissive. - The alignment fails if dimensions' names differ. attrs_file : str or path-like, optional Path of the file used to read global attributes from. By default global attributes are read from the first file provided, diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 3968a03345a..d8fdea92952 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -122,6 +122,7 @@ class Aligner(Generic[T_Alignable]): results: tuple[T_Alignable, ...] objects_matching_indexes: tuple[dict[MatchingIndexKey, Index], ...] join: str + broadcast: bool exclude_dims: frozenset[Hashable] exclude_vars: frozenset[Hashable] copy: bool @@ -142,6 +143,7 @@ def __init__( self, objects: Iterable[T_Alignable], join: str = "inner", + broadcast: bool = True, indexes: Mapping[Any, Any] | None = None, exclude_dims: str | Iterable[Hashable] = frozenset(), exclude_vars: Iterable[Hashable] = frozenset(), @@ -157,6 +159,7 @@ def __init__( if join not in get_args(JoinOptions): raise ValueError(f"invalid value for join: {join}") self.join = join + self.broadcast = broadcast self.copy = copy self.fill_value = fill_value @@ -273,12 +276,13 @@ def find_matching_indexes(self) -> None: self.all_indexes = all_indexes self.all_index_vars = all_index_vars - if self.join in ("override", "strict"): + if self.join == "override" or not self.broadcast: for dim_sizes in all_indexes_dim_sizes.values(): for dim, sizes in dim_sizes.items(): if len(sizes) > 1: raise ValueError( - f"cannot align objects with join={self.join!r} with matching indexes " + f"cannot align objects with join={self.join!r} or " + f"broadcast={self.broadcast!r} with matching indexes " f"along dimension {dim!r} that don't have the same size ({sizes!r})" ) @@ -488,17 +492,17 @@ def assert_unindexed_dim_sizes_equal(self) -> None: ) def assert_equal_dimension_names(self) -> None: - # Strict mode only allows objects having the exact same dimensions' names. - if not self.join == "strict": + # When broadcasting is disabled, only allows objects having the exact same dimensions' names. + if self.broadcast: return unique_dims = set(tuple(o.sizes) for o in self.objects) all_objects_have_same_dims = len(unique_dims) == 1 if not all_objects_have_same_dims: raise ValueError( - f"cannot align objects with join='strict' " - f"because given objects do not share the same dimension names ({[tuple(o.sizes) for o in self.objects]!r}); " - f"try using join='exact' if you only care about equal indexes" + f"cannot align objects with broadcast=False " + f"because given objects do not share the same dimension names " + f"({[tuple(o.sizes) for o in self.objects]!r})." ) def override_indexes(self) -> None: @@ -600,7 +604,7 @@ def align(self) -> None: if self.join == "override": self.override_indexes() - elif not self.copy and (self.join in ("exact", "strict")): + elif not self.copy and self.join == "exact": self.results = self.objects else: self.reindex_all() @@ -702,6 +706,7 @@ def align( def align( *objects: T_Alignable, join: JoinOptions = "inner", + broadcast: bool = True, copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -734,9 +739,8 @@ def align( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - - "strict": similar to "exact", but less permissive. - The alignment fails if dimensions' names differ. - + broadcast : bool + The alignment fails if dimensions' names differ. copy : bool, default: True If ``copy=True``, data in the return values is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed with @@ -900,6 +904,7 @@ def align( aligner = Aligner( objects, join=join, + broadcast=broadcast, copy=copy, indexes=indexes, exclude_dims=exclude, diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index de2fbe23d35..62280469514 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -81,6 +81,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): "`.values`)." ) + broadcast = OPTIONS["arithmetic_broadcast"] join = dataset_join = OPTIONS["arithmetic_join"] return apply_ufunc( @@ -89,6 +90,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): input_core_dims=((),) * ufunc.nin, output_core_dims=((),) * ufunc.nout, join=join, + broadcast=broadcast, dataset_join=dataset_join, dataset_fill_value=np.nan, kwargs=kwargs, diff --git a/xarray/core/combine.py b/xarray/core/combine.py index e5ec770e032..04af5260b62 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -448,8 +448,6 @@ def combine_nested( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - - "strict": similar to "exact", but less permissive. - The alignment fails if dimensions' names differ. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "drop" @@ -740,8 +738,6 @@ def combine_by_coords( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - - "strict": similar to "exact", but less permissive. - The alignment fails if dimensions' names differ. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "no_conflicts" diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 68eae1566c1..bf7f79f2e9a 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -905,6 +905,7 @@ def apply_ufunc( output_core_dims: Sequence[Sequence] | None = ((),), exclude_dims: Set = frozenset(), vectorize: bool = False, + broadcast: bool = False, join: JoinOptions = "exact", dataset_join: str = "exact", dataset_fill_value: object = _NO_FILL_VALUE, @@ -969,6 +970,8 @@ def apply_ufunc( dimensions as input and vectorize it automatically with :py:func:`numpy.vectorize`. This option exists for convenience, but is almost always slower than supplying a pre-vectorized function. + broadcast : bool + The alignment fails if dimensions' names differ. join : {"outer", "inner", "left", "right", "exact"}, default: "exact" Method for joining the indexes of the passed objects along each dimension, and the variables of Dataset objects with mismatched @@ -1242,6 +1245,7 @@ def apply_ufunc( input_core_dims=input_core_dims, output_core_dims=output_core_dims, exclude_dims=exclude_dims, + broadcast=broadcast, join=join, dataset_join=dataset_join, dataset_fill_value=dataset_fill_value, @@ -1258,6 +1262,7 @@ def apply_ufunc( variables_vfunc, *args, signature=signature, + broadcast=broadcast, join=join, exclude_dims=exclude_dims, dataset_join=dataset_join, @@ -1271,6 +1276,7 @@ def apply_ufunc( variables_vfunc, *args, signature=signature, + broadcast=broadcast, join=join, exclude_dims=exclude_dims, keep_attrs=keep_attrs, @@ -1906,6 +1912,7 @@ def dot( subscripts = ",".join(subscripts_list) subscripts += "->..." + "".join(dim_map[d] for d in output_core_dims[0]) + broadcast = OPTIONS["arithmetic_broadcast"] join = OPTIONS["arithmetic_join"] # using "inner" emulates `(a * b).sum()` for all joins (except "exact") if join != "exact": @@ -1920,6 +1927,7 @@ def dot( input_core_dims=input_core_dims, output_core_dims=output_core_dims, join=join, + broadcast=broadcast, dask="allowed", ) return result.transpose(*all_dims, missing_dims="ignore") diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 9fec652533a..677e798d955 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -146,8 +146,6 @@ def concat( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - - "strict": similar to "exact", but less permissive. - The alignment fails if dimensions' names differ. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "override" diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 46d97b36560..829a4455d2e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4671,8 +4671,11 @@ def _binary_op( if isinstance(other, (Dataset, GroupBy)): return NotImplemented if isinstance(other, DataArray): + broadcast = OPTIONS["arithmetic_broadcast"] align_type = OPTIONS["arithmetic_join"] - self, other = align(self, other, join=align_type, copy=False) + self, other = align( + self, other, join=align_type, broadcast=broadcast, copy=False + ) other_variable_or_arraylike: DaCompatible = getattr(other, "variable", other) other_coords = getattr(other, "coords", None) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3caa418e00e..e41ce8c0c30 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7582,8 +7582,11 @@ def _binary_op(self, other, f, reflexive=False, join=None) -> Dataset: if isinstance(other, GroupBy): return NotImplemented align_type = OPTIONS["arithmetic_join"] if join is None else join + broadcast = OPTIONS["arithmetic_broadcast"] if isinstance(other, (DataArray, Dataset)): - self, other = align(self, other, join=align_type, copy=False) + self, other = align( + self, other, join=align_type, broadcast=broadcast, copy=False + ) g = f if not reflexive else lambda x, y: f(y, x) ds = self._calculate_binary_op(g, other, join=align_type) keep_attrs = _get_keep_attrs(default=False) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 4b40fcc17b3..a689620e524 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -790,8 +790,6 @@ def merge( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - - "strict": similar to "exact", but less permissive. - The alignment fails if dimensions' names differ. fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps diff --git a/xarray/core/options.py b/xarray/core/options.py index 077b84f6b87..82fd3afcf08 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING, Literal, TypedDict +from typing import TYPE_CHECKING, Any, Literal, TypedDict from xarray.core.utils import FrozenDict @@ -9,6 +9,7 @@ from matplotlib.colors import Colormap Options = Literal[ + "arithmetic_broadcast", "arithmetic_join", "cmap_divergent", "cmap_sequential", @@ -33,6 +34,7 @@ ] class T_Options(TypedDict): + arithmetic_broadcast: bool arithmetic_join: Literal["inner", "outer", "left", "right", "exact"] cmap_divergent: str | Colormap cmap_sequential: str | Colormap @@ -57,6 +59,7 @@ class T_Options(TypedDict): OPTIONS: T_Options = { + "arithmetic_broadcast": True, "arithmetic_join": "inner", "cmap_divergent": "RdBu_r", "cmap_sequential": "viridis", @@ -88,26 +91,35 @@ def _positive_integer(value: int) -> bool: return isinstance(value, int) and value > 0 +def _is_boolean(value: Any) -> bool: + return isinstance(value, bool) + + +def _is_boolean_or_default(value: Any) -> bool: + return value in (True, False, "default") + + _VALIDATORS = { + "arithmetic_broadcast": _is_boolean, "arithmetic_join": _JOIN_OPTIONS.__contains__, "display_max_rows": _positive_integer, "display_values_threshold": _positive_integer, "display_style": _DISPLAY_OPTIONS.__contains__, "display_width": _positive_integer, - "display_expand_attrs": lambda choice: choice in [True, False, "default"], - "display_expand_coords": lambda choice: choice in [True, False, "default"], - "display_expand_data_vars": lambda choice: choice in [True, False, "default"], - "display_expand_data": lambda choice: choice in [True, False, "default"], - "display_expand_indexes": lambda choice: choice in [True, False, "default"], - "display_default_indexes": lambda choice: choice in [True, False, "default"], - "enable_cftimeindex": lambda value: isinstance(value, bool), + "display_expand_attrs": _is_boolean_or_default, + "display_expand_coords": _is_boolean_or_default, + "display_expand_data_vars": _is_boolean_or_default, + "display_expand_data": _is_boolean_or_default, + "display_expand_indexes": _is_boolean_or_default, + "display_default_indexes": _is_boolean_or_default, + "enable_cftimeindex": _is_boolean, "file_cache_maxsize": _positive_integer, - "keep_attrs": lambda choice: choice in [True, False, "default"], - "use_bottleneck": lambda value: isinstance(value, bool), - "use_numbagg": lambda value: isinstance(value, bool), - "use_opt_einsum": lambda value: isinstance(value, bool), - "use_flox": lambda value: isinstance(value, bool), - "warn_for_unclosed_files": lambda value: isinstance(value, bool), + "keep_attrs": _is_boolean_or_default, + "use_bottleneck": _is_boolean, + "use_numbagg": _is_boolean, + "use_opt_einsum": _is_boolean, + "use_flox": _is_boolean, + "warn_for_unclosed_files": _is_boolean, } @@ -154,6 +166,8 @@ class set_options: Parameters ---------- + arithmetic_broadcast : bool, default: True + Whether to allow or disallow broadcasting arithmetic_join : {"inner", "outer", "left", "right", "exact"}, default: "inner" DataArray/Dataset alignment in binary operations: @@ -166,8 +180,6 @@ class set_options: - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - - "strict": similar to "exact", but less permissive. - The alignment fails if dimensions' names differ. cmap_divergent : str or matplotlib.colors.Colormap, default: "RdBu_r" Colormap to use for divergent data plots. If string, must be diff --git a/xarray/core/types.py b/xarray/core/types.py index 4e3906a87a2..410cf3de00b 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -198,7 +198,7 @@ def copy( Literal["drop", "identical", "no_conflicts", "drop_conflicts", "override"], Callable[..., Any], ] -JoinOptions = Literal["outer", "inner", "left", "right", "exact", "override", "strict"] +JoinOptions = Literal["outer", "inner", "left", "right", "exact", "override"] Interp1dOptions = Literal[ "linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial" diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index e4d9ba122aa..5e63d9d0e7f 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1287,7 +1287,7 @@ def test_concat_join_coordinate_variables_non_asked_dims(): ) # Using join='outer' - # Not using strict mode will allow the concatenation to surprisingly happen + # default's broadcast=True will allow the concatenation to surprisingly happen # even if `x_outer` sizes do not match actual_xds = concat( [ds1, ds2], @@ -1315,25 +1315,8 @@ def test_concat_join_coordinate_variables_non_asked_dims(): coords="different", ) - # Using join='strict' - # A check similar to the one made on non-indexed dimensions regarding their sizes. - with pytest.raises( - ValueError, - match=re.escape( - r"cannot align objects with join='strict' with matching indexes " - r"along dimension 'x_outer' that don't have the same size ({3, 4})" - ), - ): - concat( - [ds1, ds2], - join="strict", - dim="x_center", - data_vars="different", - coords="different", - ) - -@pytest.mark.parametrize("join", ("outer", "exact", "strict")) +@pytest.mark.parametrize("join", ("outer", "exact")) def test_concat_join_non_coordinate_variables(join: JoinOptions): ds1 = Dataset( data_vars={ @@ -1349,7 +1332,7 @@ def test_concat_join_non_coordinate_variables(join: JoinOptions): }, ) - # Whether join='outer' or join='exact' or join='strict' modes are used, + # Whether join='outer' or join='exact' modes are used, # the concatenation fails because of the behavior disallowing alignment # of non-indexed dimensions (not attached to a coordinate variable). with pytest.raises( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a16547f4f29..5b0bae1e56f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3194,25 +3194,22 @@ def test_align_str_dtype(self) -> None: assert_identical(expected_b, actual_b) assert expected_b.x.dtype == actual_b.x.dtype - def test_align_exact_vs_strict_same_dim_same_size(self) -> None: + def test_broadcast_on_vs_off_same_dim_same_size(self) -> None: xda_1 = xr.DataArray([1], dims="x") xda_2 = xr.DataArray([1], dims="x") - # join='exact' passes - aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact") + aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact", broadcast=True) assert_identical(aligned_1, xda_1) assert_identical(aligned_2, xda_2) - # join='strict' passes - aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="strict") + aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact", broadcast=False) assert_identical(aligned_1, xda_1) assert_identical(aligned_2, xda_2) - def test_align_exact_vs_strict_same_dim_differing_sizes(self) -> None: + def test_broadcast_on_vs_off_same_dim_differing_sizes(self) -> None: xda_1 = xr.DataArray([1], dims="x") xda_2 = xr.DataArray([1, 2], dims="x") - # join='exact' fails because of non-matching sizes for the same 'x' dimension with pytest.raises( ValueError, match=re.escape( @@ -3220,9 +3217,8 @@ def test_align_exact_vs_strict_same_dim_differing_sizes(self) -> None: "conflicting dimension sizes: {1, 2}" ), ): - xr.align(xda_1, xda_2, join="exact") + xr.align(xda_1, xda_2, join="exact", broadcast=True) - # join='strict' fails because of non-matching sizes for the same 'x' dimension with pytest.raises( ValueError, match=re.escape( @@ -3230,69 +3226,65 @@ def test_align_exact_vs_strict_same_dim_differing_sizes(self) -> None: "conflicting dimension sizes: {1, 2}" ), ): - xr.align(xda_1, xda_2, join="strict") + xr.align(xda_1, xda_2, join="exact", broadcast=False) - def test_align_exact_vs_strict_differing_dims_same_sizes(self) -> None: + def test_broadcast_on_vs_off_differing_dims_same_sizes(self) -> None: xda_1 = xr.DataArray([1], dims="x1") xda_2 = xr.DataArray([1], dims="x2") - # join='exact' passes - aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact") + aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact", broadcast=True) assert_identical(aligned_1, xda_1) assert_identical(aligned_2, xda_2) - # join='strict' fails because of non-matching dimensions' names with pytest.raises( ValueError, match=re.escape( - "cannot align objects with join='strict' " + "cannot align objects with broadcast=False " "because given objects do not share the same dimension names " "([('x1',), ('x2',)])" ), ): - xr.align(xda_1, xda_2, join="strict") + xr.align(xda_1, xda_2, join="exact", broadcast=False) - def test_align_exact_vs_strict_differing_dims_differing_sizes(self) -> None: + def test_broadcast_on_vs_off_differing_dims_differing_sizes(self) -> None: xda_1 = xr.DataArray([1], dims="x1") xda_2 = xr.DataArray([1, 2], dims="x2") - # join='exact' passes - aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact") + aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact", broadcast=True) assert_identical(aligned_1, xda_1) assert_identical(aligned_2, xda_2) - # join='strict' fails because of non-matching dimensions' names with pytest.raises( ValueError, match=re.escape( - "cannot align objects with join='strict' " + "cannot align objects with broadcast=False " "because given objects do not share the same dimension names " "([('x1',), ('x2',)])" ), ): - xr.align(xda_1, xda_2, join="strict") + xr.align(xda_1, xda_2, join="exact", broadcast=False) - def test_align_exact_vs_strict_2d(self) -> None: + def test_broadcast_on_vs_off_2d(self) -> None: xda_1 = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("y1", "x1")) xda_2 = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("y2", "x2")) xda_3 = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("y3", "x3")) - # join='exact' passes - aligned_1, aligned_2, aligned_3 = xr.align(xda_1, xda_2, xda_3, join="exact") + aligned_1, aligned_2, aligned_3 = xr.align( + xda_1, xda_2, xda_3, join="exact", broadcast=True + ) assert_identical(aligned_1, xda_1) assert_identical(aligned_2, xda_2) assert_identical(aligned_3, xda_3) - # join='strict' fails because of non-matching dimensions' names with pytest.raises( ValueError, match=re.escape( - "cannot align objects with join='strict' " + "cannot align objects with broadcast=False " "because given objects do not share the same dimension names " "([('y1', 'x1'), ('y2', 'x2'), ('y3', 'x3')])" ), ): - xr.align(xda_1, xda_2, xda_3, join="strict") + xr.align(xda_1, xda_2, xda_3, join="exact", broadcast=False) def test_broadcast_arrays(self) -> None: x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x") From d84c688ba3a2998b07e02465f9caa94c869e2830 Mon Sep 17 00:00:00 2001 From: eschalk Date: Sat, 17 Feb 2024 14:56:58 +0100 Subject: [PATCH 08/18] More broadcasts --- xarray/core/alignment.py | 1 + xarray/core/computation.py | 4 ++++ xarray/core/merge.py | 16 ++++++++++++++-- xarray/tests/__init__.py | 1 + xarray/tests/test_dataarray.py | 22 ++++++++++++++++++++++ 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index d8fdea92952..d870bff1364 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -917,6 +917,7 @@ def align( def deep_align( objects: Iterable[Any], join: JoinOptions = "inner", + broadcast: bool = True, copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), diff --git a/xarray/core/computation.py b/xarray/core/computation.py index bf7f79f2e9a..2e2aa8bafa4 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -282,6 +282,7 @@ def apply_dataarray_vfunc( *args, signature: _UFuncSignature, join: JoinOptions = "inner", + broadcast: bool = True, exclude_dims=frozenset(), keep_attrs="override", ) -> tuple[DataArray, ...] | DataArray: @@ -295,6 +296,7 @@ def apply_dataarray_vfunc( deep_align( args, join=join, + broadcast=broadcast, copy=False, exclude=exclude_dims, raise_on_invalid=False, @@ -494,6 +496,7 @@ def apply_dataset_vfunc( signature: _UFuncSignature, join="inner", dataset_join="exact", + broadcast: bool = True, fill_value=_NO_FILL_VALUE, exclude_dims=frozenset(), keep_attrs="override", @@ -518,6 +521,7 @@ def apply_dataset_vfunc( deep_align( args, join=join, + broadcast=broadcast, copy=False, exclude=exclude_dims, raise_on_invalid=False, diff --git a/xarray/core/merge.py b/xarray/core/merge.py index a689620e524..9989deb23c8 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -541,6 +541,7 @@ def merge_coords( objects: Iterable[CoercibleMapping], compat: CompatOptions = "minimal", join: JoinOptions = "outer", + broadcast: bool = True, priority_arg: int | None = None, indexes: Mapping[Any, Index] | None = None, fill_value: object = dtypes.NA, @@ -554,7 +555,12 @@ def merge_coords( _assert_compat_valid(compat) coerced = coerce_pandas_values(objects) aligned = deep_align( - coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value + coerced, + join=join, + broadcast=broadcast, + copy=False, + indexes=indexes, + fill_value=fill_value, ) collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) @@ -647,6 +653,7 @@ def merge_core( objects: Iterable[CoercibleMapping], compat: CompatOptions = "broadcast_equals", join: JoinOptions = "outer", + broadcast: bool = True, combine_attrs: CombineAttrsOptions = "override", priority_arg: int | None = None, explicit_coords: Iterable[Hashable] | None = None, @@ -709,7 +716,12 @@ def merge_core( coerced = coerce_pandas_values(objects) aligned = deep_align( - coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value + coerced, + join=join, + broadcast=broadcast, + copy=False, + indexes=indexes, + fill_value=fill_value, ) for pos, obj in skip_align_objs: diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 207caba48f0..e69d3b2c97f 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -89,6 +89,7 @@ def _importorskip( has_pynio, requires_pynio = _importorskip("Nio") has_cftime, requires_cftime = _importorskip("cftime") has_dask, requires_dask = _importorskip("dask") +has_dask_expr, requires_dask_expr = _importorskip("dask_expr") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") has_rasterio, requires_rasterio = _importorskip("rasterio") has_zarr, requires_zarr = _importorskip("zarr") diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5b0bae1e56f..c4fc3904576 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -51,6 +51,7 @@ requires_bottleneck, requires_cupy, requires_dask, + requires_dask_expr, requires_iris, requires_numexpr, requires_pint, @@ -3246,6 +3247,26 @@ def test_broadcast_on_vs_off_differing_dims_same_sizes(self) -> None: ): xr.align(xda_1, xda_2, join="exact", broadcast=False) + def test_broadcast_on_vs_off_global_option(self) -> None: + xda_1 = xr.DataArray([1], dims="x1") + xda_2 = xr.DataArray([1], dims="x2") + + with xr.set_options(arithmetic_broadcast=True): + expected_xda = xr.DataArray([[1.0]], dims=("x1", "x2")) + actual_xda = xda_1 / xda_2 + assert_identical(expected_xda, actual_xda) + + with xr.set_options(arithmetic_broadcast=False): + with pytest.raises( + ValueError, + match=re.escape( + "cannot align objects with broadcast=False " + "because given objects do not share the same dimension names " + "([('x1',), ('x2',)])" + ), + ): + xda_1 / xda_2 + def test_broadcast_on_vs_off_differing_dims_differing_sizes(self) -> None: xda_1 = xr.DataArray([1], dims="x1") xda_2 = xr.DataArray([1, 2], dims="x2") @@ -3464,6 +3485,7 @@ def test_to_dataframe_0length(self) -> None: assert len(actual) == 0 assert_array_equal(actual.index.names, list("ABC")) + @requires_dask_expr @requires_dask def test_to_dask_dataframe(self) -> None: arr_np = np.arange(3 * 4).reshape(3, 4) From a7148d6ff2d0c0fb47efdb3f25a92f6b063a64ea Mon Sep 17 00:00:00 2001 From: eschalk Date: Sat, 17 Feb 2024 15:48:49 +0100 Subject: [PATCH 09/18] CI failed: mypy + warnings --- .gitignore | 3 +++ xarray/core/alignment.py | 6 ++++++ xarray/tests/__init__.py | 8 +++++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 21c18c17ff7..e8a97ed5328 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,6 @@ doc/team-panel.txt doc/external-examples-gallery.txt doc/notebooks-examples-gallery.txt doc/videos-gallery.txt + +# MyPy Report +mypy_report/ diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index d870bff1364..ffb91c5e3fc 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -623,6 +623,7 @@ def align( /, *, join: JoinOptions = "inner", + broadcast: bool = True, copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -637,6 +638,7 @@ def align( /, *, join: JoinOptions = "inner", + broadcast: bool = True, copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -652,6 +654,7 @@ def align( /, *, join: JoinOptions = "inner", + broadcast: bool = True, copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -668,6 +671,7 @@ def align( /, *, join: JoinOptions = "inner", + broadcast: bool = True, copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -685,6 +689,7 @@ def align( /, *, join: JoinOptions = "inner", + broadcast: bool = True, copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), @@ -696,6 +701,7 @@ def align( def align( *objects: T_Alignable, join: JoinOptions = "inner", + broadcast: bool = True, copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index d521c998b97..2e6e638f5b1 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -89,7 +89,13 @@ def _importorskip( has_pynio, requires_pynio = _importorskip("Nio") has_cftime, requires_cftime = _importorskip("cftime") has_dask, requires_dask = _importorskip("dask") -has_dask_expr, requires_dask_expr = _importorskip("dask_expr") +with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="The current Dask DataFrame implementation is deprecated.", + category=DeprecationWarning, + ) + has_dask_expr, requires_dask_expr = _importorskip("dask_expr") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") has_rasterio, requires_rasterio = _importorskip("rasterio") has_zarr, requires_zarr = _importorskip("zarr") From 3a4e81aee38c1974e3fe588a20ac36ad2db01bca Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 18 Feb 2024 23:21:25 +0100 Subject: [PATCH 10/18] Review - revert useless change --- xarray/core/alignment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index ffb91c5e3fc..c045e8392ff 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -604,7 +604,7 @@ def align(self) -> None: if self.join == "override": self.override_indexes() - elif not self.copy and self.join == "exact": + elif self.join == "exact" and not self.copy: self.results = self.objects else: self.reindex_all() From 4247a868906b6fd8618f81e335bb46c9ed2a26d6 Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 18 Feb 2024 23:22:27 +0100 Subject: [PATCH 11/18] Review - missing passed down param, useless newline --- xarray/core/alignment.py | 1 + xarray/core/combine.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index c045e8392ff..545873f8372 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -984,6 +984,7 @@ def is_alignable(obj): aligned = align( *targets, join=join, + broadcast=broadcast, copy=copy, indexes=indexes, exclude=exclude, diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 04af5260b62..5cb0a3417fa 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -448,7 +448,6 @@ def combine_nested( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ "override"} or callable, default: "drop" A callable or a string indicating how to combine attrs of the objects being From 9db7db1329e3c4b47e9346d052e697f82e053a7d Mon Sep 17 00:00:00 2001 From: Etienne Schalk <45271239+etienneschalk@users.noreply.github.com> Date: Sun, 18 Feb 2024 23:32:03 +0100 Subject: [PATCH 12/18] Apply suggestions from code review Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 3 --- xarray/core/alignment.py | 11 ++++++----- xarray/tests/test_dataarray.py | 16 ++++++---------- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c629e1763cf..4e4e3cc3b9f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,9 +32,6 @@ New Features - Allow negative frequency strings (e.g. ``"-1YE"``). These strings are for example used in :py:func:`date_range`, and :py:func:`cftime_range` (:pull:`8651`). By `Mathias Hauser `_. -- Added a ``join='strict'`` mode for ``Aligner.align`` and related classes. - (:issue:`7132`, :issue:`8230`). - By `Etienne Schalk `_. - Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to` (:pull:`8380`) By `Anderson Banihirwe `_. - Xarray now defers to flox's `heuristics `_ diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index ffb91c5e3fc..451bee9ba04 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -281,9 +281,9 @@ def find_matching_indexes(self) -> None: for dim, sizes in dim_sizes.items(): if len(sizes) > 1: raise ValueError( - f"cannot align objects with join={self.join!r} or " - f"broadcast={self.broadcast!r} with matching indexes " - f"along dimension {dim!r} that don't have the same size ({sizes!r})" + f"cannot align objects" + f"with indexes " + f"along dimension {dim!r} that don't have the same size ({sizes!r}) when {message} " ) def find_matching_unindexed_dims(self) -> None: @@ -745,8 +745,9 @@ def align( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - broadcast : bool - The alignment fails if dimensions' names differ. + broadcast : bool, optional + Disallow automatic broadcasting of all objects along dimensions that are present in some but not all objects. + If False, this will raise an error when all objects do *not* have the same dimensions. copy : bool, default: True If ``copy=True``, data in the return values is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed with diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 646be13a68b..a7876f98d69 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3204,17 +3204,13 @@ def test_align_str_dtype(self) -> None: assert_identical(expected_b, actual_b) assert expected_b.x.dtype == actual_b.x.dtype - def test_broadcast_on_vs_off_same_dim_same_size(self) -> None: - xda_1 = xr.DataArray([1], dims="x") - xda_2 = xr.DataArray([1], dims="x") + @pytest.mark.parametrize("broadcast", [True, False]) + def test_broadcast_on_vs_off_same_dim_same_size(self, broadcast) -> None: + xda = xr.DataArray([1], dims="x") - aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact", broadcast=True) - assert_identical(aligned_1, xda_1) - assert_identical(aligned_2, xda_2) - - aligned_1, aligned_2 = xr.align(xda_1, xda_2, join="exact", broadcast=False) - assert_identical(aligned_1, xda_1) - assert_identical(aligned_2, xda_2) + aligned_1, aligned_2 = xr.align(xda, xda, join="exact", broadcast=broadcast) + assert_identical(aligned_1, xda) + assert_identical(aligned_2, xda) def test_broadcast_on_vs_off_same_dim_differing_sizes(self) -> None: xda_1 = xr.DataArray([1], dims="x") From e4d0a84e5cad25ad2fad5e3af3d85fa42dcc4b45 Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 18 Feb 2024 23:34:41 +0100 Subject: [PATCH 13/18] review - error message --- xarray/core/alignment.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 1e4b177cf73..10f4f61652a 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -280,10 +280,15 @@ def find_matching_indexes(self) -> None: for dim_sizes in all_indexes_dim_sizes.values(): for dim, sizes in dim_sizes.items(): if len(sizes) > 1: + message = ( + "join='override'" + if self.join == "override" + else "broadcast=False" + ) raise ValueError( - f"cannot align objects" - f"with indexes " - f"along dimension {dim!r} that don't have the same size ({sizes!r}) when {message} " + f"cannot align objects with indexes " + f"along dimension {dim!r} that don't have the same size " + f"({sizes!r}) when {message}" ) def find_matching_unindexed_dims(self) -> None: From fe56de83f2442da291a87dc6fd2c4739bcb9bab4 Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 18 Feb 2024 23:44:59 +0100 Subject: [PATCH 14/18] Review - Use dims --- xarray/core/alignment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 10f4f61652a..bff68936591 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -501,13 +501,13 @@ def assert_equal_dimension_names(self) -> None: if self.broadcast: return - unique_dims = set(tuple(o.sizes) for o in self.objects) + unique_dims = set(o.dims for o in self.objects) all_objects_have_same_dims = len(unique_dims) == 1 if not all_objects_have_same_dims: raise ValueError( f"cannot align objects with broadcast=False " f"because given objects do not share the same dimension names " - f"({[tuple(o.sizes) for o in self.objects]!r})." + f"({[tuple(o.dims) for o in self.objects]!r})." ) def override_indexes(self) -> None: From e4b0cd82e263cefabd61e6b835b41149baed30bb Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 18 Feb 2024 23:51:22 +0100 Subject: [PATCH 15/18] param test --- xarray/core/alignment.py | 2 +- xarray/tests/test_dataarray.py | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index bff68936591..d3bf15eb340 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -501,7 +501,7 @@ def assert_equal_dimension_names(self) -> None: if self.broadcast: return - unique_dims = set(o.dims for o in self.objects) + unique_dims = set(tuple(o.dims) for o in self.objects) all_objects_have_same_dims = len(unique_dims) == 1 if not all_objects_have_same_dims: raise ValueError( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a7876f98d69..0edb97430ae 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3205,14 +3205,15 @@ def test_align_str_dtype(self) -> None: assert expected_b.x.dtype == actual_b.x.dtype @pytest.mark.parametrize("broadcast", [True, False]) - def test_broadcast_on_vs_off_same_dim_same_size(self, broadcast) -> None: + def test_broadcast_on_vs_off_same_dim_same_size(self, broadcast: bool) -> None: xda = xr.DataArray([1], dims="x") aligned_1, aligned_2 = xr.align(xda, xda, join="exact", broadcast=broadcast) assert_identical(aligned_1, xda) assert_identical(aligned_2, xda) - def test_broadcast_on_vs_off_same_dim_differing_sizes(self) -> None: + @pytest.mark.parametrize("broadcast", [True, False]) + def test_broadcast_on_vs_off_same_dim_differing_sizes(self: bool) -> None: xda_1 = xr.DataArray([1], dims="x") xda_2 = xr.DataArray([1, 2], dims="x") @@ -3223,16 +3224,7 @@ def test_broadcast_on_vs_off_same_dim_differing_sizes(self) -> None: "conflicting dimension sizes: {1, 2}" ), ): - xr.align(xda_1, xda_2, join="exact", broadcast=True) - - with pytest.raises( - ValueError, - match=re.escape( - "cannot reindex or align along dimension 'x' because of " - "conflicting dimension sizes: {1, 2}" - ), - ): - xr.align(xda_1, xda_2, join="exact", broadcast=False) + xr.align(xda_1, xda_2, join="exact", broadcast=broadcast) def test_broadcast_on_vs_off_differing_dims_same_sizes(self) -> None: xda_1 = xr.DataArray([1], dims="x1") From 26d731152fd05490fc35be4533b08c25c3ba2c0f Mon Sep 17 00:00:00 2001 From: eschalk Date: Mon, 19 Feb 2024 00:02:58 +0100 Subject: [PATCH 16/18] Remove faulty line whats-new --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6b0daba9d28..01c63e889b3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,7 +34,6 @@ New Features - Added the ability to control broadcasting for alignment, and new gloal option ``arithmetic_broadcast`` (:issue:`6806`, :pull:`8698`). By `Etienne Schalk `_. -- Added a simple `nbytes` representation in DataArrays and Dataset `repr`. - Added a simple ``nbytes`` representation in DataArrays and Dataset ``repr``. (:issue:`8690`, :pull:`8702`). By `Etienne Schalk `_. From df8fd3a8e0c1b2c96b807621be696218a9f98a83 Mon Sep 17 00:00:00 2001 From: eschalk Date: Mon, 19 Feb 2024 00:07:32 +0100 Subject: [PATCH 17/18] Review - remove broadcast from apply_ufunc --- xarray/core/computation.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 0760d466641..d3406264b75 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -909,7 +909,6 @@ def apply_ufunc( output_core_dims: Sequence[Sequence] | None = ((),), exclude_dims: Set = frozenset(), vectorize: bool = False, - broadcast: bool = False, join: JoinOptions = "exact", dataset_join: str = "exact", dataset_fill_value: object = _NO_FILL_VALUE, @@ -974,8 +973,6 @@ def apply_ufunc( dimensions as input and vectorize it automatically with :py:func:`numpy.vectorize`. This option exists for convenience, but is almost always slower than supplying a pre-vectorized function. - broadcast : bool - The alignment fails if dimensions' names differ. join : {"outer", "inner", "left", "right", "exact"}, default: "exact" Method for joining the indexes of the passed objects along each dimension, and the variables of Dataset objects with mismatched @@ -1249,7 +1246,6 @@ def apply_ufunc( input_core_dims=input_core_dims, output_core_dims=output_core_dims, exclude_dims=exclude_dims, - broadcast=broadcast, join=join, dataset_join=dataset_join, dataset_fill_value=dataset_fill_value, @@ -1266,7 +1262,6 @@ def apply_ufunc( variables_vfunc, *args, signature=signature, - broadcast=broadcast, join=join, exclude_dims=exclude_dims, dataset_join=dataset_join, @@ -1280,7 +1275,6 @@ def apply_ufunc( variables_vfunc, *args, signature=signature, - broadcast=broadcast, join=join, exclude_dims=exclude_dims, keep_attrs=keep_attrs, From 651289f6b1b44952c8ead6b308df47e8ca401a46 Mon Sep 17 00:00:00 2001 From: eschalk Date: Mon, 19 Feb 2024 00:12:23 +0100 Subject: [PATCH 18/18] Re-add missing line --- xarray/backends/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e06245d4b29..d3026a535e2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -943,6 +943,7 @@ def open_mfdataset( aligned are not equal - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. attrs_file : str or path-like, optional Path of the file used to read global attributes from. By default global attributes are read from the first file provided,