Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,8 @@ cdef inline bint is_null_period(v):
def _create_binary_propagating_op(name, divmod=False):

def method(self, other):
if other is C_NA or isinstance(other, str) or isinstance(other, numbers.Number):
if (other is C_NA or isinstance(other, str)
or isinstance(other, (numbers.Number, np.bool_))):
if divmod:
return NA, NA
else:
Expand Down
79 changes: 49 additions & 30 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import numbers
from typing import TYPE_CHECKING, Type
from typing import TYPE_CHECKING, Any, Tuple, Type
import warnings

import numpy as np

from pandas._libs import lib
from pandas._libs import lib, missing as libmissing
from pandas.compat import set_function_name

from pandas.core.dtypes.base import ExtensionDtype
Expand Down Expand Up @@ -61,13 +61,13 @@ class BooleanDtype(ExtensionDtype):
@property
def na_value(self) -> "Scalar":
"""
BooleanDtype uses :attr:`numpy.nan` as the missing NA value.
BooleanDtype uses :attr:`pd.NA` as the missing NA value.

.. warning::

`na_value` may change in a future release.
"""
return np.nan
return libmissing.NA

@property
def type(self) -> Type:
Expand Down Expand Up @@ -223,7 +223,7 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):

>>> pd.array([True, False, None], dtype="boolean")
<BooleanArray>
[True, False, NaN]
[True, False, NA]
Length: 3, dtype: boolean
"""

Expand Down Expand Up @@ -262,17 +262,17 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
values, mask = coerce_to_array(scalars, copy=copy)
return BooleanArray(values, mask)

def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
data = self._data.astype("int8")
data[self._mask] = -1
return data, -1

@classmethod
def _from_factorized(cls, values, original: "BooleanArray"):
return cls._from_sequence(values, dtype=original.dtype)

def _formatter(self, boxed=False):
def fmt(x):
if isna(x):
return "NaN"
return str(x)

return fmt
return str

def __getitem__(self, item):
if is_integer(item):
Expand All @@ -281,7 +281,9 @@ def __getitem__(self, item):
return self._data[item]
return type(self)(self._data[item], self._mask[item])

def _coerce_to_ndarray(self, force_bool: bool = False):
def _coerce_to_ndarray(
self, force_bool: bool = False, na_value: "Scalar" = lib._no_default
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to prefer lib_no_default to just libmissing.NA directly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, not sure. I was probably sidetracked by the idea I could not use None as default as that is a valid value as well ..
(if we want to have this generic / shared with other arrays, using self._na_value might be useful, but I don't think we will share this with arrays that don't use pd.NA, so ..)

):
"""
Coerce to an ndarary of object dtype or bool dtype (if force_bool=True).

Expand All @@ -290,6 +292,9 @@ def _coerce_to_ndarray(self, force_bool: bool = False):
force_bool : bool, default False
If True, return bool array or raise error if not possible (in
presence of missing values)
na_value : scalar, optional
Scalar missing value indicator to use in numpy array. Defaults
to the native missing value indicator of this array (pd.NA).
"""
if force_bool:
if not self.isna().any():
Expand All @@ -298,8 +303,10 @@ def _coerce_to_ndarray(self, force_bool: bool = False):
raise ValueError(
"cannot convert to bool numpy array in presence of missing values"
)
if na_value is lib._no_default:
na_value = self._na_value
data = self._data.astype(object)
data[self._mask] = self._na_value
data[self._mask] = na_value
return data

__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
Expand Down Expand Up @@ -483,8 +490,17 @@ def astype(self, dtype, copy=True):
return IntegerArray(
self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
)
# for integer, error if there are missing values
if is_integer_dtype(dtype):
if self.isna().any():
raise ValueError("cannot convert NA to integer")
# for float dtype, ensure we use np.nan before casting (numpy cannot
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pending the discussion in #30038.

# deal with pd.NA)
na_value = lib._no_default
if is_float_dtype(dtype):
na_value = np.nan
# coerce
data = self._coerce_to_ndarray()
data = self._coerce_to_ndarray(na_value=na_value)
return astype_nansafe(data, dtype, copy=None)

def value_counts(self, dropna=True):
Expand Down Expand Up @@ -594,8 +610,6 @@ def logical_method(self, other):

@classmethod
def _create_comparison_method(cls, op):
op_name = op.__name__

def cmp_method(self, other):

if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
Expand All @@ -617,21 +631,26 @@ def cmp_method(self, other):
if len(self) != len(other):
raise ValueError("Lengths must match to compare")

# numpy will show a DeprecationWarning on invalid elementwise
# comparisons, this will raise in the future
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all="ignore"):
result = op(self._data, other)

# nans propagate
if mask is None:
mask = self._mask
if other is libmissing.NA:
# numpy does not handle pd.NA well as "other" scalar (it returns
# a scalar False instead of an array)
result = np.zeros_like(self._data)
mask = np.ones_like(self._data)
else:
mask = self._mask | mask
# numpy will show a DeprecationWarning on invalid elementwise
# comparisons, this will raise in the future
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all="ignore"):
result = op(self._data, other)

# nans propagate
if mask is None:
mask = self._mask.copy()
else:
mask = self._mask | mask

result[mask] = op_name == "ne"
return BooleanArray(result, np.zeros(len(result), dtype=bool), copy=False)
return BooleanArray(result, mask, copy=False)

name = "__{name}__".format(name=op.__name__)
return set_function_name(cmp_method, name, cls)
Expand All @@ -643,7 +662,7 @@ def _reduce(self, name, skipna=True, **kwargs):
# coerce to a nan-aware float if needed
if mask.any():
data = self._data.astype("float64")
data[mask] = self._na_value
data[mask] = np.nan

op = getattr(nanops, "nan" + name)
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
Expand Down
58 changes: 48 additions & 10 deletions pandas/tests/arrays/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_coerce_to_numpy_array():
# with missing values -> object dtype
arr = pd.array([True, False, None], dtype="boolean")
result = np.array(arr)
expected = np.array([True, False, None], dtype="object")
expected = np.array([True, False, pd.NA], dtype="object")
tm.assert_numpy_array_equal(result, expected)

# also with no missing values -> object dtype
Expand All @@ -238,12 +238,11 @@ def test_coerce_to_numpy_array():
def test_astype():
# with missing values
arr = pd.array([True, False, None], dtype="boolean")
msg = "cannot convert float NaN to"

with pytest.raises(ValueError, match=msg):
with pytest.raises(ValueError, match="cannot convert NA to integer"):
arr.astype("int64")

with pytest.raises(ValueError, match=msg):
with pytest.raises(ValueError, match="cannot convert float NaN to"):
arr.astype("bool")

result = arr.astype("float64")
Expand Down Expand Up @@ -406,9 +405,8 @@ def _compare_other(self, data, op_name, other):
# array
result = pd.Series(op(data, other))
expected = pd.Series(op(data._data, other), dtype="boolean")

# fill the nan locations
expected[data._mask] = op_name == "__ne__"
# propagate NAs
expected[data._mask] = pd.NA

tm.assert_series_equal(result, expected)

Expand All @@ -419,9 +417,8 @@ def _compare_other(self, data, op_name, other):
expected = pd.Series(data._data)
expected = op(expected, other)
expected = expected.astype("boolean")

# fill the nan locations
expected[data._mask] = op_name == "__ne__"
# propagate NAs
expected[data._mask] = pd.NA

tm.assert_series_equal(result, expected)

Expand All @@ -438,6 +435,47 @@ def test_compare_array(self, data, all_compare_operators):
other = pd.Series([True] * len(data))
self._compare_other(data, op_name, other)

@pytest.mark.parametrize("other", [True, False, pd.NA])
def test_scalar(self, other, all_compare_operators):
op = self.get_op_from_name(all_compare_operators)
a = pd.array([True, False, None], dtype="boolean")

result = op(a, other)

if other is pd.NA:
expected = pd.array([None, None, None], dtype="boolean")
else:
values = op(a._data, other)
expected = BooleanArray(values, a._mask, copy=True)
tm.assert_extension_array_equal(result, expected)

# ensure we haven't mutated anything inplace
result[0] = None
tm.assert_extension_array_equal(
a, pd.array([True, False, None], dtype="boolean")
)

def test_array(self, all_compare_operators):
op = self.get_op_from_name(all_compare_operators)
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")

result = op(a, b)

values = op(a._data, b._data)
mask = a._mask | b._mask
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)

# ensure we haven't mutated anything inplace
result[0] = None
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)


class TestArithmeticOps(BaseOpsUtil):
def test_error(self, data, all_arithmetic_operators):
Expand Down
14 changes: 11 additions & 3 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ def data_missing_for_sorting(dtype):

@pytest.fixture
def na_cmp():
# we are np.nan
return lambda x, y: np.isnan(x) and np.isnan(y)
# we are pd.NA
return lambda x, y: x is pd.NA and y is pd.NA


@pytest.fixture
def na_value():
return np.nan
return pd.NA


@pytest.fixture
Expand Down Expand Up @@ -160,6 +160,14 @@ def check_opname(self, s, op_name, other, exc=None):
def _compare_other(self, s, data, op_name, other):
self.check_opname(s, op_name, other)

@pytest.mark.skip(reason="Tested in tests/arrays/test_boolean.py")
def test_compare_scalar(self, data, all_compare_operators):
pass

@pytest.mark.skip(reason="Tested in tests/arrays/test_boolean.py")
def test_compare_array(self, data, all_compare_operators):
pass


class TestReshaping(base.BaseReshapingTests):
pass
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/scalar/test_na_scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ def test_arithmetic_ops(all_arithmetic_functions):

def test_comparison_ops():

for other in [NA, 1, 1.0, "a", np.int64(1), np.nan]:
for other in [NA, 1, 1.0, "a", np.int64(1), np.nan, np.bool_(True)]:
assert (NA == other) is NA
assert (NA != other) is NA
assert (NA > other) is NA
assert (NA >= other) is NA
assert (NA < other) is NA
assert (NA <= other) is NA

if isinstance(other, np.int64):
if isinstance(other, (np.int64, np.bool_)):
# for numpy scalars we get a deprecation warning and False as result
# for equality or error for larger/lesser than
continue
Expand Down