Skip to content

Commit ecf23e1

Browse files
committed
performance improved approach
1 parent db31f6a commit ecf23e1

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

pandas/core/algorithms.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,35 @@ def f(c, v):
579579
f = lambda a, b: np.isin(a, b).ravel()
580580

581581
else:
582+
# Fast-path for integer mixes: if both sides are integer-kind and
583+
# have different dtypes, avoid upcasting to float64 (which loses
584+
# precision for large 64-bit integers). When possible, perform the
585+
# comparison in unsigned 64-bit space which preserves exact integer
586+
# equality and uses the integer hashtable for performance.
587+
if (
588+
values.dtype.kind in "iu"
589+
and comps_array.dtype.kind in "iu"
590+
and not is_dtype_equal(values.dtype, comps_array.dtype)
591+
):
592+
try:
593+
# only proceed when both arrays are non-empty
594+
if values.size > 0 and comps_array.size > 0:
595+
signed_negative = False
596+
if values.dtype.kind == "i":
597+
# using min is vectorized and fast
598+
signed_negative = values.min() < 0
599+
if comps_array.dtype.kind == "i":
600+
signed_negative = signed_negative or (comps_array.min() < 0)
601+
602+
if not signed_negative:
603+
# safe to cast both to uint64 for exact comparison
604+
values_u = values.astype("uint64", copy=False)
605+
comps_u = comps_array.astype("uint64", copy=False)
606+
return htable.ismember(comps_u, values_u)
607+
except Exception:
608+
# fall back to generic behavior on any error
609+
pass
610+
582611
common = np_find_common_type(values.dtype, comps_array.dtype)
583612
values = values.astype(common, copy=False)
584613
comps_array = comps_array.astype(common, copy=False)

pandas/tests/series/methods/test_isin.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,13 @@ def test_isin_filtering_on_iterable(data, isin):
267267
expected_result = Series([True, True, False])
268268

269269
tm.assert_series_equal(result, expected_result)
270+
271+
272+
def test_isin_int64_vs_uint64_mismatch():
273+
# Regression test for mixing signed int64 Series with uint64 values
274+
ser = Series([1378774140726870442], dtype=np.int64)
275+
vals = [np.uint64(1378774140726870528)]
276+
277+
res = ser.isin(vals)
278+
expected = Series([False])
279+
tm.assert_series_equal(res, expected)

0 commit comments

Comments
 (0)