Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate VariableArithmetic to NamedArrayArithmetic #8244

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion xarray/backends/file_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@

from xarray.backends.locks import acquire
from xarray.backends.lru_cache import LRUCache
from xarray.core import utils
from xarray.core.options import OPTIONS
from xarray.namedarray import utils

# Global cache for storing open files.
FILE_CACHE: LRUCache[Any, io.IOBase] = LRUCache(
Expand Down
3 changes: 2 additions & 1 deletion xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import numpy as np

from xarray.core import dtypes, duck_array_ops, utils
from xarray.core import dtypes, duck_array_ops
from xarray.core.alignment import align, deep_align
from xarray.core.common import zeros_like
from xarray.core.duck_array_ops import datetime_to_numeric
Expand All @@ -26,6 +26,7 @@
from xarray.core.types import Dims, T_DataArray
from xarray.core.utils import is_dict_like, is_scalar
from xarray.core.variable import Variable
from xarray.namedarray import utils

if TYPE_CHECKING:
from xarray.core.coordinates import Coordinates
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
from xarray.core.types import DataVars, Self, T_DataArray, T_Xarray
from xarray.core.utils import (
Frozen,
ReprObject,
either_dict_or_kwargs,
emit_user_level_warning,
)
from xarray.core.variable import Variable, as_variable, calculate_dimensions
from xarray.namedarray.utils import ReprObject

if TYPE_CHECKING:
from xarray.core.common import DataWithCoords
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
from xarray.core.utils import (
Default,
HybridMappingProxy,
ReprObject,
_default,
either_dict_or_kwargs,
emit_user_level_warning,
Expand All @@ -64,6 +63,7 @@
as_compatible_data,
as_variable,
)
from xarray.namedarray.utils import ReprObject
from xarray.plot.accessor import DataArrayPlotAccessor
from xarray.plot.utils import _get_units_from_attrs

Expand Down
187 changes: 1 addition & 186 deletions xarray/core/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,188 +1,3 @@
from __future__ import annotations

import functools

import numpy as np

from xarray.core import utils

# Use as a sentinel value to indicate a dtype appropriate NA value.
NA = utils.ReprObject("<NA>")


@functools.total_ordering
class AlwaysGreaterThan:
def __gt__(self, other):
return True

def __eq__(self, other):
return isinstance(other, type(self))


@functools.total_ordering
class AlwaysLessThan:
def __lt__(self, other):
return True

def __eq__(self, other):
return isinstance(other, type(self))


# Equivalence to np.inf (-np.inf) for object-type
INF = AlwaysGreaterThan()
NINF = AlwaysLessThan()


# Pairs of types that, if both found, should be promoted to object dtype
# instead of following NumPy's own type-promotion rules. These type promotion
# rules match pandas instead. For reference, see the NumPy type hierarchy:
# https://numpy.org/doc/stable/reference/arrays.scalars.html
PROMOTE_TO_OBJECT: tuple[tuple[type[np.generic], type[np.generic]], ...] = (
(np.number, np.character), # numpy promotes to character
(np.bool_, np.character), # numpy promotes to character
(np.bytes_, np.str_), # numpy promotes to unicode
)


def maybe_promote(dtype):
"""Simpler equivalent of pandas.core.common._maybe_promote

Parameters
----------
dtype : np.dtype

Returns
-------
dtype : Promoted dtype that can hold missing values.
fill_value : Valid missing value for the promoted dtype.
"""
# N.B. these casting rules should match pandas
if np.issubdtype(dtype, np.floating):
fill_value = np.nan
elif np.issubdtype(dtype, np.timedelta64):
# See https://github.com/numpy/numpy/issues/10685
# np.timedelta64 is a subclass of np.integer
# Check np.timedelta64 before np.integer
fill_value = np.timedelta64("NaT")
elif np.issubdtype(dtype, np.integer):
dtype = np.float32 if dtype.itemsize <= 2 else np.float64
fill_value = np.nan
elif np.issubdtype(dtype, np.complexfloating):
fill_value = np.nan + np.nan * 1j
elif np.issubdtype(dtype, np.datetime64):
fill_value = np.datetime64("NaT")
else:
dtype = object
fill_value = np.nan

dtype = np.dtype(dtype)
fill_value = dtype.type(fill_value)
return dtype, fill_value


NAT_TYPES = {np.datetime64("NaT").dtype, np.timedelta64("NaT").dtype}


def get_fill_value(dtype):
"""Return an appropriate fill value for this dtype.

Parameters
----------
dtype : np.dtype

Returns
-------
fill_value : Missing value corresponding to this dtype.
"""
_, fill_value = maybe_promote(dtype)
return fill_value


def get_pos_infinity(dtype, max_for_int=False):
"""Return an appropriate positive infinity for this dtype.

Parameters
----------
dtype : np.dtype
max_for_int : bool
Return np.iinfo(dtype).max instead of np.inf

Returns
-------
fill_value : positive infinity value corresponding to this dtype.
"""
if issubclass(dtype.type, np.floating):
return np.inf

if issubclass(dtype.type, np.integer):
if max_for_int:
return np.iinfo(dtype).max
else:
return np.inf

if issubclass(dtype.type, np.complexfloating):
return np.inf + 1j * np.inf

return INF


def get_neg_infinity(dtype, min_for_int=False):
"""Return an appropriate positive infinity for this dtype.

Parameters
----------
dtype : np.dtype
min_for_int : bool
Return np.iinfo(dtype).min instead of -np.inf

Returns
-------
fill_value : positive infinity value corresponding to this dtype.
"""
if issubclass(dtype.type, np.floating):
return -np.inf

if issubclass(dtype.type, np.integer):
if min_for_int:
return np.iinfo(dtype).min
else:
return -np.inf

if issubclass(dtype.type, np.complexfloating):
return -np.inf - 1j * np.inf

return NINF


def is_datetime_like(dtype):
"""Check if a dtype is a subclass of the numpy datetime types"""
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)


def result_type(
*arrays_and_dtypes: np.typing.ArrayLike | np.typing.DTypeLike,
) -> np.dtype:
"""Like np.result_type, but with type promotion rules matching pandas.

Examples of changed behavior:
number + string -> object (not string)
bytes + unicode -> object (not unicode)

Parameters
----------
*arrays_and_dtypes : list of arrays and dtypes
The dtype is extracted from both numpy and dask arrays.

Returns
-------
numpy.dtype for the result.
"""
types = {np.result_type(t).type for t in arrays_and_dtypes}

for left, right in PROMOTE_TO_OBJECT:
if any(issubclass(t, left) for t in types) and any(
issubclass(t, right) for t in types
):
return np.dtype(object)

return np.result_type(*arrays_and_dtypes)
from xarray.namedarray.dtypes import * # noqa: F401, F403
104 changes: 1 addition & 103 deletions xarray/core/pycompat.py
Original file line number Diff line number Diff line change
@@ -1,103 +1 @@
from __future__ import annotations

from importlib import import_module
from types import ModuleType
from typing import TYPE_CHECKING, Any, Literal

import numpy as np
from packaging.version import Version

from xarray.core.utils import is_duck_array, is_scalar, module_available

integer_types = (int, np.integer)

if TYPE_CHECKING:
ModType = Literal["dask", "pint", "cupy", "sparse", "cubed"]
DuckArrayTypes = tuple[type[Any], ...] # TODO: improve this? maybe Generic


class DuckArrayModule:
"""
Solely for internal isinstance and version checks.

Motivated by having to only import pint when required (as pint currently imports xarray)
https://github.com/pydata/xarray/pull/5561#discussion_r664815718
"""

module: ModuleType | None
version: Version
type: DuckArrayTypes
available: bool

def __init__(self, mod: ModType) -> None:
duck_array_module: ModuleType | None
duck_array_version: Version
duck_array_type: DuckArrayTypes
try:
duck_array_module = import_module(mod)
duck_array_version = Version(duck_array_module.__version__)

if mod == "dask":
duck_array_type = (import_module("dask.array").Array,)
elif mod == "pint":
duck_array_type = (duck_array_module.Quantity,)
elif mod == "cupy":
duck_array_type = (duck_array_module.ndarray,)
elif mod == "sparse":
duck_array_type = (duck_array_module.SparseArray,)
elif mod == "cubed":
duck_array_type = (duck_array_module.Array,)
else:
raise NotImplementedError

except (ImportError, AttributeError): # pragma: no cover
duck_array_module = None
duck_array_version = Version("0.0.0")
duck_array_type = ()

self.module = duck_array_module
self.version = duck_array_version
self.type = duck_array_type
self.available = duck_array_module is not None


_cached_duck_array_modules: dict[ModType, DuckArrayModule] = {}


def _get_cached_duck_array_module(mod: ModType) -> DuckArrayModule:
if mod not in _cached_duck_array_modules:
duckmod = DuckArrayModule(mod)
_cached_duck_array_modules[mod] = duckmod
return duckmod
else:
return _cached_duck_array_modules[mod]


def array_type(mod: ModType) -> DuckArrayTypes:
"""Quick wrapper to get the array class of the module."""
return _get_cached_duck_array_module(mod).type


def mod_version(mod: ModType) -> Version:
"""Quick wrapper to get the version of the module."""
return _get_cached_duck_array_module(mod).version


def is_dask_collection(x):
if module_available("dask"):
from dask.base import is_dask_collection

return is_dask_collection(x)
return False


def is_duck_dask_array(x):
return is_duck_array(x) and is_dask_collection(x)


def is_chunked_array(x) -> bool:
return is_duck_dask_array(x) or (is_duck_array(x) and hasattr(x, "chunks"))


def is_0d_dask_array(x):
return is_duck_dask_array(x) and is_scalar(x)
from xarray.namedarray.pycompat import * # noqa: F401, F403
25 changes: 0 additions & 25 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,31 +605,6 @@ def __repr__(self: Any) -> str:
return f"{type(self).__name__}(array={self.array!r})"


class ReprObject:
"""Object that prints as the given value, for use with sentinel values."""

__slots__ = ("_value",)

def __init__(self, value: str):
self._value = value

def __repr__(self) -> str:
return self._value

def __eq__(self, other) -> bool:
if isinstance(other, ReprObject):
return self._value == other._value
return False

def __hash__(self) -> int:
return hash((type(self), self._value))

def __dask_tokenize__(self):
from dask.base import normalize_token

return normalize_token((type(self), self._value))


@contextlib.contextmanager
def close_on_error(f):
"""Context manager to ensure that a file opened by xarray is closed if an
Expand Down
Loading