diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 995e7676afbca..b5049e734739b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1212,6 +1212,7 @@ Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`) +- Bug in :class:``PyObjectHashTable`` that would silently suppress exceptions thrown from custom ``__hash__`` and ``__eq__`` methods during hashing (:issue:`57052`) - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) - Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index eae393f33bfd3..05a697fcc0c2c 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -73,7 +73,7 @@ cimported_types = ['complex64', 'int16', 'int32', 'int64', - 'pymap', + 'pymap_checked', 'str', 'strbox', 'uint8', @@ -1312,13 +1312,13 @@ cdef class StringHashTable(HashTable): cdef class PyObjectHashTable(HashTable): def __init__(self, int64_t size_hint=1): - self.table = kh_init_pymap() + self.table = kh_init_pymap_checked() size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT) - kh_resize_pymap(self.table, size_hint) + kh_resize_pymap_checked(self.table, size_hint) def __dealloc__(self): if self.table is not NULL: - kh_destroy_pymap(self.table) + kh_destroy_pymap_checked(self.table) self.table = NULL def __len__(self) -> int: @@ -1329,7 +1329,7 @@ cdef class PyObjectHashTable(HashTable): khiter_t k hash(key) - k = kh_get_pymap(self.table, key) + k = kh_get_pymap_checked(self.table, key) return k != self.table.n_buckets def sizeof(self, deep: bool = False) -> int: @@ -1356,7 +1356,7 @@ cdef class PyObjectHashTable(HashTable): cdef: khiter_t k - k = kh_get_pymap(self.table, val) + k = kh_get_pymap_checked(self.table, val) if k != self.table.n_buckets: return self.table.vals[k] else: @@ -1370,8 +1370,8 @@ cdef class PyObjectHashTable(HashTable): hash(key) - k = kh_put_pymap(self.table, key, &ret) - if kh_exist_pymap(self.table, k): + k = kh_put_pymap_checked(self.table, key, &ret) + if kh_exist_pymap_checked(self.table, k): self.table.vals[k] = val else: raise KeyError(key) @@ -1388,7 +1388,7 @@ cdef class PyObjectHashTable(HashTable): val = values[i] hash(val) - k = kh_put_pymap(self.table, val, &ret) + k = kh_put_pymap_checked(self.table, val, &ret) self.table.vals[k] = i def lookup(self, ndarray[object] values, object mask = None) -> ndarray: @@ -1405,7 +1405,7 @@ cdef class PyObjectHashTable(HashTable): val = values[i] hash(val) - k = kh_get_pymap(self.table, val) + k = kh_get_pymap_checked(self.table, val) if k != self.table.n_buckets: locs[i] = self.table.vals[k] else: @@ -1483,10 +1483,10 @@ cdef class PyObjectHashTable(HashTable): labels[i] = na_sentinel continue - k = kh_get_pymap(self.table, val) + k = kh_get_pymap_checked(self.table, val) if k == self.table.n_buckets: # k hasn't been seen yet - k = kh_put_pymap(self.table, val, &ret) + k = kh_put_pymap_checked(self.table, val, &ret) uniques.append(val) if return_inverse: self.table.vals[k] = count diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 3487f5ebd050d..0722fb65218b8 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -6,26 +6,24 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in {{py: -# name, dtype, ttype, c_type, to_c_type -dtypes = [('Complex128', 'complex128', 'complex128', - 'khcomplex128_t', 'to_khcomplex128_t'), - ('Complex64', 'complex64', 'complex64', - 'khcomplex64_t', 'to_khcomplex64_t'), - ('Float64', 'float64', 'float64', 'float64_t', ''), - ('Float32', 'float32', 'float32', 'float32_t', ''), - ('UInt64', 'uint64', 'uint64', 'uint64_t', ''), - ('UInt32', 'uint32', 'uint32', 'uint32_t', ''), - ('UInt16', 'uint16', 'uint16', 'uint16_t', ''), - ('UInt8', 'uint8', 'uint8', 'uint8_t', ''), - ('Object', 'object', 'pymap', 'object', ''), - ('Int64', 'int64', 'int64', 'int64_t', ''), - ('Int32', 'int32', 'int32', 'int32_t', ''), - ('Int16', 'int16', 'int16', 'int16_t', ''), - ('Int8', 'int8', 'int8', 'int8_t', '')] +# name, dtype, ttype, tfunc, c_type, to_c_type +dtypes = [('Complex128', 'complex128', 'complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'), + ('Complex64', 'complex64', 'complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'), + ('Float64', 'float64', 'float64', 'float64', 'float64_t', ''), + ('Float32', 'float32', 'float32', 'float32', 'float32_t', ''), + ('UInt64', 'uint64', 'uint64', 'uint64', 'uint64_t', ''), + ('UInt32', 'uint32', 'uint32', 'uint32', 'uint32_t', ''), + ('UInt16', 'uint16', 'uint16', 'uint16', 'uint16_t', ''), + ('UInt8', 'uint8', 'uint8', 'uint8', 'uint8_t', ''), + ('Object', 'object', 'pymap', 'pymap_checked', 'object', ''), + ('Int64', 'int64', 'int64', 'int64', 'int64_t', ''), + ('Int32', 'int32', 'int32', 'int32', 'int32_t', ''), + ('Int16', 'int16', 'int16', 'int16', 'int16_t', ''), + ('Int8', 'int8', 'int8', 'int8', 'int8_t', '')] }} -{{for name, dtype, ttype, c_type, to_c_type in dtypes}} +{{for name, dtype, ttype, tfunc, c_type, to_c_type in dtypes}} @cython.wraparound(False) @@ -55,26 +53,26 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8 # result_keys remembers the original order of keys result_keys = {{name}}Vector() - table = kh_init_{{ttype}}() + table = kh_init_{{tfunc}}() {{if dtype == 'object'}} if uses_mask: raise NotImplementedError("uses_mask not implemented with object dtype") - kh_resize_{{ttype}}(table, n // 10) + kh_resize_{{tfunc}}(table, n // 10) for i in range(n): val = values[i] if not dropna or not checknull(val): - k = kh_get_{{ttype}}(table, {{to_c_type}}val) + k = kh_get_{{tfunc}}(table, {{to_c_type}}val) if k != table.n_buckets: table.vals[k] += 1 else: - k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret) + k = kh_put_{{tfunc}}(table, {{to_c_type}}val, &ret) table.vals[k] = 1 result_keys.append(val) {{else}} - kh_resize_{{ttype}}(table, n) + kh_resize_{{tfunc}}(table, n) for i in range(n): val = {{to_c_type}}(values[i]) @@ -90,11 +88,11 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8 if uses_mask and isna_entry: na_counter += 1 else: - k = kh_get_{{ttype}}(table, val) + k = kh_get_{{tfunc}}(table, val) if k != table.n_buckets: table.vals[k] += 1 else: - k = kh_put_{{ttype}}(table, val, &ret) + k = kh_put_{{tfunc}}(table, val, &ret) table.vals[k] = 1 result_keys.append(val) {{endif}} @@ -107,9 +105,9 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8 for i in range(table.size): {{if dtype == 'object'}} - k = kh_get_{{ttype}}(table, result_keys.data[i]) + k = kh_get_{{tfunc}}(table, result_keys.data[i]) {{else}} - k = kh_get_{{ttype}}(table, result_keys.data.data[i]) + k = kh_get_{{tfunc}}(table, result_keys.data.data[i]) {{endif}} result_counts[i] = table.vals[k] @@ -117,7 +115,7 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8 result_counts[table.size] = na_counter result_keys.append(val) - kh_destroy_{{ttype}}(table) + kh_destroy_{{tfunc}}(table) return result_keys.to_array(), result_counts.base, na_counter @@ -138,12 +136,12 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons {{endif}} Py_ssize_t i, n = len(values), first_na = -1 khiter_t k - kh_{{ttype}}_t *table = kh_init_{{ttype}}() + kh_{{ttype}}_t *table = kh_init_{{tfunc}}() ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool') bint seen_na = False, uses_mask = mask is not None bint seen_multiple_na = False - kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT)) + kh_resize_{{tfunc}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT)) if keep not in ('last', 'first', False): raise ValueError('keep must be either "first", "last" or False') @@ -168,7 +166,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons seen_na = True else: value = {{to_c_type}}(values[i]) - kh_put_{{ttype}}(table, value, &ret) + kh_put_{{tfunc}}(table, value, &ret) out[i] = ret == 0 {{endfor}} @@ -193,16 +191,16 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons else: value = {{to_c_type}}(values[i]) - k = kh_get_{{ttype}}(table, value) + k = kh_get_{{tfunc}}(table, value) if k != table.n_buckets: out[table.vals[k]] = 1 out[i] = 1 else: - k = kh_put_{{ttype}}(table, value, &ret) + k = kh_put_{{tfunc}}(table, value, &ret) table.vals[k] = i out[i] = 0 - kh_destroy_{{ttype}}(table) + kh_destroy_{{tfunc}}(table) return out @@ -243,11 +241,11 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values): {{c_type}} val {{endif}} - kh_{{ttype}}_t *table = kh_init_{{ttype}}() + kh_{{ttype}}_t *table = kh_init_{{tfunc}}() # construct the table n = len(values) - kh_resize_{{ttype}}(table, n) + kh_resize_{{tfunc}}(table, n) {{if dtype == 'object'}} if True: @@ -256,7 +254,7 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values): {{endif}} for i in range(n): val = {{to_c_type}}(values[i]) - kh_put_{{ttype}}(table, val, &ret) + kh_put_{{tfunc}}(table, val, &ret) # test membership n = len(arr) @@ -269,10 +267,10 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values): {{endif}} for i in range(n): val = {{to_c_type}}(arr[i]) - k = kh_get_{{ttype}}(table, val) + k = kh_get_{{tfunc}}(table, val) result[i] = (k != table.n_buckets) - kh_destroy_{{ttype}}(table) + kh_destroy_{{tfunc}}(table) return result.view(np.bool_) # ---------------------------------------------------------------------- diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index e0bb96d57b9e1..e8abc4415dbe6 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -3,7 +3,6 @@ #pragma once #include - #include #include @@ -192,7 +191,16 @@ static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) { return 1; } +static inline int _is_pandas_NA_type(PyObject *o) { + // TODO compare PyTypeObject* C_NA, not strings! + PyObject *type_name = PyType_GetName(Py_TYPE(o)); + return PyUnicode_CompareWithASCIIString(type_name, "NAType") == 0; +} + static inline int pyobject_cmp(PyObject *a, PyObject *b) { + if (PyErr_Occurred() != NULL) { + return 0; + } if (a == b) { return 1; } @@ -211,11 +219,12 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) { return tupleobject_cmp((PyTupleObject *)a, (PyTupleObject *)b); } // frozenset isn't yet supported + } else if (_is_pandas_NA_type(a) || _is_pandas_NA_type(b)) { + return 0; } int result = PyObject_RichCompareBool(a, b, Py_EQ); if (result < 0) { - PyErr_Clear(); return 0; } return result; @@ -292,6 +301,9 @@ static inline Py_hash_t tupleobject_hash(PyTupleObject *key) { } static inline khuint32_t kh_python_hash_func(PyObject *key) { + if (PyErr_Occurred() != NULL) { + return 0; + } Py_hash_t hash; // For PyObject_Hash holds: // hash(0.0) == 0 == hash(-0.0) @@ -310,12 +322,19 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) { } else if (PyTuple_Check(key)) { // hash tuple subclasses as builtin tuples hash = tupleobject_hash((PyTupleObject *)key); + } else if (PyDict_Check(key) || PyList_Check(key)) { + // Before GH 57052 was fixed, all exceptions raised from PyObject_Hash were + // suppressed. Existing code that relies on this behaviour is for example: + // * _libs.hashtable.value_count_object via DataFrame.describe + // * _libs.hashtable.ismember_object via Series.isin + // Using hash = 0 puts all dict and list objects in the same bucket, + // which is bad for performance but that is how it worked before. + hash = 0; } else { hash = PyObject_Hash(key); } if (hash == -1) { - PyErr_Clear(); return 0; } #if SIZEOF_PY_HASH_T == 4 diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd index c439e1cca772b..f8c30eec113f7 100644 --- a/pandas/_libs/khash.pxd +++ b/pandas/_libs/khash.pxd @@ -125,5 +125,13 @@ cdef extern from "pandas/vendored/klib/khash_python.h": khuint_t kh_needed_n_buckets(khuint_t element_n) nogil +cdef kh_pymap_t* kh_init_pymap_checked() +cdef void kh_destroy_pymap_checked(kh_pymap_t*) +cdef void kh_clear_pymap_checked(kh_pymap_t*) +cdef khuint_t kh_get_pymap_checked(kh_pymap_t*, PyObject*) +cdef void kh_resize_pymap_checked(kh_pymap_t*, khuint_t) +cdef khuint_t kh_put_pymap_checked(kh_pymap_t*, PyObject*, int*) +cdef void kh_del_pymap_checked(kh_pymap_t*, khuint_t) +cdef bint kh_exist_pymap_checked(kh_pymap_t*, khiter_t) include "khash_for_primitive_helper.pxi" diff --git a/pandas/_libs/khash.pyx b/pandas/_libs/khash.pyx new file mode 100644 index 0000000000000..c2b684100131f --- /dev/null +++ b/pandas/_libs/khash.pyx @@ -0,0 +1,78 @@ +from cpython.exc cimport ( + PyErr_Fetch, + PyErr_Occurred, +) +from cpython.object cimport PyObject +from cpython.ref cimport Py_XDECREF + + +cdef inline raise_if_errors(): + cdef: + object exc_type + object exc_value + PyObject *type + PyObject *value + PyObject *traceback + + if PyErr_Occurred(): + PyErr_Fetch(&type, &value, &traceback) + Py_XDECREF(traceback) + if value != NULL: + exc_value = value + if isinstance(exc_value, str): + if type != NULL: + exc_type = type + else: + exc_type = RuntimeError + Py_XDECREF(type) + raise exc_type(exc_value) + else: + Py_XDECREF(type) + raise exc_value + + +cdef kh_pymap_t* kh_init_pymap_checked(): + cdef kh_pymap_t* table = kh_init_pymap() + if PyErr_Occurred(): + kh_destroy_pymap(table) + table = NULL + raise_if_errors() + return table + + +cdef void kh_destroy_pymap_checked(kh_pymap_t* table): + kh_destroy_pymap(table) + raise_if_errors() + + +cdef void kh_clear_pymap_checked(kh_pymap_t* table): + kh_clear_pymap(table) + raise_if_errors() + + +cdef khuint_t kh_get_pymap_checked(kh_pymap_t* table, PyObject* key): + cdef khuint_t k = kh_get_pymap(table, key) + raise_if_errors() + return k + + +cdef void kh_resize_pymap_checked(kh_pymap_t* table, khuint_t new_n_buckets): + kh_resize_pymap(table, new_n_buckets) + raise_if_errors() + + +cdef khuint_t kh_put_pymap_checked(kh_pymap_t* table, PyObject* key, int* ret): + cdef khuint_t result = kh_put_pymap(table, key, ret) + raise_if_errors() + return result + + +cdef void kh_del_pymap_checked(kh_pymap_t* table, khuint_t k): + kh_del_pymap(table, k) + raise_if_errors() + + +cdef bint kh_exist_pymap_checked(kh_pymap_t* table, khiter_t k): + cdef bint res = kh_exist_pymap(table, k) + raise_if_errors() + return res diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index 33fc65e5034d0..f6d6e74648cf3 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -97,6 +97,7 @@ libs_sources = { 'sources': ['join.pyx', _khash_primitive_helper], 'deps': _khash_primitive_helper_dep, }, + 'khash': {'sources': ['khash.pyx']}, 'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']}, 'missing': {'sources': ['missing.pyx']}, 'pandas_datetime': { diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 828e4415bd295..21736b24eb35e 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -25,6 +25,7 @@ from typing import ( TYPE_CHECKING, Any, + Literal, ) import numpy as np @@ -41,12 +42,16 @@ ExtensionArray, ExtensionDtype, ) +from pandas.core.algorithms import duplicated from pandas.core.indexers import unpack_tuple_and_ellipses if TYPE_CHECKING: from collections.abc import Mapping - from pandas._typing import type_t + from pandas._typing import ( + npt, + type_t, + ) class JSONDtype(ExtensionDtype): @@ -254,6 +259,17 @@ def _pad_or_backfill(self, *, method, limit=None, copy=True): # GH#56616 - test EA method without limit_area argument return super()._pad_or_backfill(method=method, limit=limit, copy=copy) + def duplicated( + self, keep: Literal["first", "last", False] = "first" + ) -> npt.NDArray[np.bool_]: + # pd.core.algorithms.duplicated is implemented with a hash table that + # does not support UserDict values. + # However, dict values are always hashed as 0 for backwards compatibility, + # see GH 57052 + mask = self.isna().astype(np.bool_, copy=False) + values = np.array([dict(x) for x in self], dtype="object") + return duplicated(values=values, keep=keep, mask=mask) + def make_data(n: int): # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 2b36c1135d36d..82d31889a6ffe 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1943,3 +1943,27 @@ def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, index def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer): df = DataFrame({"a": [1, 2, None]}, dtype=float_numpy_dtype) self._check_setitem_invalid(df, invalid, indexer) + + +def test_error_raised_from_custom_hash_method(): + # GH 57052 + class testkey: + def __init__(self, value): + self.value = value + + def __hash__(self): + raise RuntimeError(f"exception in {self!r}.__hash__") + + def __eq__(self, other): + return self.value == other.value + + def __repr__(self): + return f"testkey({self.value})" + + df = DataFrame({"i": map(testkey, range(10))}).set_index("i") + for i in range(len(df.index)): + key = testkey(i) + with pytest.raises( + RuntimeError, match=re.escape(f"exception in {key!r}.__hash__") + ): + df.loc[key] diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py index 6a95cfc7355d8..42d486d6fd524 100644 --- a/pandas/tests/libs/test_hashtable.py +++ b/pandas/tests/libs/test_hashtable.py @@ -1,6 +1,7 @@ from collections import namedtuple from collections.abc import Generator from contextlib import contextmanager +from itertools import product import re import struct import tracemalloc @@ -780,3 +781,124 @@ def test_float_complex_int_are_equal_as_objects(): result = isin(np.array(values, dtype=object), np.asarray(comps)) expected = np.array([False, True, True, True], dtype=np.bool_) tm.assert_numpy_array_equal(result, expected) + + +class testkey: + # GH 57052 + def __init__(self, value, throw_hash=False, throw_eq=False): + self.value = value + self.throw_hash = throw_hash + self.throw_eq = throw_eq + + def __hash__(self): + if self.throw_hash: + raise RuntimeError(f"exception in {self!r}.__hash__") + return hash(self.value) + + def __eq__(self, other): + if self.throw_eq: + raise RuntimeError(f"exception in {self!r}.__eq__") + return self.value == other.value + + def __repr__(self): + return f"testkey({self.value}, {self.throw_hash}, {self.throw_eq})" + + +@pytest.mark.parametrize("throw1, throw2", product([True, False], repeat=2)) +def test_error_raised_from_hash_method_in_set_item(throw1, throw2): + # GH 57052 + table = ht.PyObjectHashTable() + + key1 = testkey(value="hello1", throw_hash=throw1) + key2 = testkey(value="hello2", throw_hash=throw2) + + if throw1: + with pytest.raises( + RuntimeError, match=re.escape(f"exception in {key1!r}.__hash__") + ): + table.set_item(key1, 123) + else: + table.set_item(key1, 123) + assert table.get_item(key1) == 123 + + if throw2: + with pytest.raises( + RuntimeError, match=re.escape(f"exception in {key2!r}.__hash__") + ): + table.set_item(key2, 456) + else: + table.set_item(key2, 456) + assert table.get_item(key2) == 456 + + +@pytest.mark.parametrize("throw1, throw2", product([True, False], repeat=2)) +def test_error_raised_from_hash_method_in_get_item(throw1, throw2): + # GH 57052 + table = ht.PyObjectHashTable() + + key1 = testkey(value="hello1") + key2 = testkey(value="hello2") + + table.set_item(key1, 123) + table.set_item(key2, 456) + + key1.throw_hash = throw1 + key2.throw_hash = throw2 + + if throw1: + with pytest.raises( + RuntimeError, match=re.escape(f"exception in {key1!r}.__hash__") + ): + table.get_item(key1) + else: + assert table.get_item(key1) == 123 + + if throw2: + with pytest.raises( + RuntimeError, match=re.escape(f"exception in {key2!r}.__hash__") + ): + table.get_item(key2) + else: + assert table.get_item(key2) == 456 + + +@pytest.mark.parametrize("throw", [True, False]) +def test_error_raised_from_eq_method_in_set_item(throw): + # GH 57052 + table = ht.PyObjectHashTable() + + key1 = testkey(value="hello", throw_eq=throw) + key2 = testkey(value=key1.value) + + if throw: + table.set_item(key1, 123) + with pytest.raises( + RuntimeError, match=re.escape(f"exception in {key1!r}.__eq__") + ): + table.set_item(key2, 456) + else: + table.set_item(key2, 456) + assert table.get_item(key2) == 456 + + +@pytest.mark.parametrize("throw", [True, False]) +def test_error_raised_from_eq_method_in_get_item(throw): + # GH 57052 + table = ht.PyObjectHashTable() + + key1 = testkey(value="hello") + key2 = testkey(value=key1.value) + + table.set_item(key1, 123) + table.set_item(key2, 456) + + if throw: + key1.throw_eq = True + with pytest.raises( + RuntimeError, match=re.escape(f"exception in {key1!r}.__eq__") + ): + table.get_item(key2) + else: + # this looks odd but it is because key1.value == key2.value + assert table.get_item(key1) == 456 + assert table.get_item(key2) == 456