Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
is_integer_dtype,
is_list_like,
is_object_dtype,
is_signed_integer_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.concat import concat_compat
Expand Down Expand Up @@ -518,16 +517,10 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
orig_values = list(values)
values = _ensure_arraylike(orig_values, func_name="isin-targets")

if (
len(values) > 0
and values.dtype.kind in "iufcb"
and not is_signed_integer_dtype(comps)
and not is_dtype_equal(values, comps)
):
# GH#46485 Use object to avoid upcast to float64 later
# TODO: Share with _find_common_type_compat
values = construct_1d_object_array_from_listlike(orig_values)
# Keep values as a numeric ndarray where possible; we handle
# signed/unsigned integer mixes with a fast-path later (after
# comps_array extraction) to avoid object-dtype conversions that
# harm performance for large numeric arrays.

elif isinstance(values, ABCMultiIndex):
# Avoid raising in extract_array
Expand Down Expand Up @@ -579,6 +572,34 @@ def f(c, v):
f = lambda a, b: np.isin(a, b).ravel()

else:
# Fast-path: handle integer-kind mixes without upcasting to float64.
if (
values.dtype.kind in "iu"
and comps_array.dtype.kind in "iu"
# Only apply fast-path for 64-bit integer widths to avoid
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you expand on this comment? I don't fully understand what the surprising behavior is, and I'm worried its a red herring

# surprising behaviour on platforms or dtypes with different
# itemsize. Narrowing to 8-byte ints
# keeps the fast-path safe and performant for the common case.
and values.dtype.itemsize == 8
and comps_array.dtype.itemsize == 8
and not is_dtype_equal(values.dtype, comps_array.dtype)
):
try:
if values.size > 0 and comps_array.size > 0:
signed_negative = False
if values.dtype.kind == "i":
signed_negative = values.min() < 0
if comps_array.dtype.kind == "i":
signed_negative = signed_negative or (comps_array.min() < 0)

if not signed_negative:
values_u = values.astype("uint64", copy=False)
comps_u = comps_array.astype("uint64", copy=False)
return htable.ismember(comps_u, values_u)
except Exception:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We almost never toss a base Exception in the code base. What is this trying to catch here?

# fall back to generic path on error
pass

common = np_find_common_type(values.dtype, comps_array.dtype)
values = values.astype(common, copy=False)
comps_array = comps_array.astype(common, copy=False)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/series/methods/test_isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,12 @@ def test_isin_filtering_on_iterable(data, isin):
expected_result = Series([True, True, False])

tm.assert_series_equal(result, expected_result)


def test_isin_int64_vs_uint64_mismatch():
ser = Series([1378774140726870442], dtype=np.int64)
vals = [np.uint64(1378774140726870528)]

res = ser.isin(vals)
expected = Series([False])
tm.assert_series_equal(res, expected)
Loading