pandas-dev · matiaslindgren · Oct 26, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1212,6 +1212,7 @@ Other
 ^^^^^
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`)
+- Bug in :class:``PyObjectHashTable`` that would silently suppress exceptions thrown from custom ``__hash__`` and ``__eq__`` methods during hashing (:issue:`57052`)
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
 - Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -73,7 +73,7 @@ cimported_types = ['complex64',
                    'int16',
                    'int32',
                    'int64',
-                   'pymap',
+                   'pymap_checked',
                    'str',
                    'strbox',
                    'uint8',
@@ -1312,13 +1312,13 @@ cdef class StringHashTable(HashTable):
 cdef class PyObjectHashTable(HashTable):
 
     def __init__(self, int64_t size_hint=1):
-        self.table = kh_init_pymap()
+        self.table = kh_init_pymap_checked()
         size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT)
-        kh_resize_pymap(self.table, size_hint)
+        kh_resize_pymap_checked(self.table, size_hint)
 
     def __dealloc__(self):
         if self.table is not NULL:
-            kh_destroy_pymap(self.table)
+            kh_destroy_pymap_checked(self.table)
             self.table = NULL
 
     def __len__(self) -> int:
@@ -1329,7 +1329,7 @@ cdef class PyObjectHashTable(HashTable):
             khiter_t k
         hash(key)
 
-        k = kh_get_pymap(self.table, <PyObject*>key)
+        k = kh_get_pymap_checked(self.table, <PyObject*>key)
         return k != self.table.n_buckets
 
     def sizeof(self, deep: bool = False) -> int:
@@ -1356,7 +1356,7 @@ cdef class PyObjectHashTable(HashTable):
         cdef:
             khiter_t k
 
-        k = kh_get_pymap(self.table, <PyObject*>val)
+        k = kh_get_pymap_checked(self.table, <PyObject*>val)
         if k != self.table.n_buckets:
             return self.table.vals[k]
         else:
@@ -1370,8 +1370,8 @@ cdef class PyObjectHashTable(HashTable):
 
         hash(key)
 
-        k = kh_put_pymap(self.table, <PyObject*>key, &ret)
-        if kh_exist_pymap(self.table, k):
+        k = kh_put_pymap_checked(self.table, <PyObject*>key, &ret)
+        if kh_exist_pymap_checked(self.table, k):
             self.table.vals[k] = val
         else:
             raise KeyError(key)
@@ -1388,7 +1388,7 @@ cdef class PyObjectHashTable(HashTable):
             val = values[i]
             hash(val)
 
-            k = kh_put_pymap(self.table, <PyObject*>val, &ret)
+            k = kh_put_pymap_checked(self.table, <PyObject*>val, &ret)
             self.table.vals[k] = i
 
     def lookup(self, ndarray[object] values, object mask = None) -> ndarray:
@@ -1405,7 +1405,7 @@ cdef class PyObjectHashTable(HashTable):
             val = values[i]
             hash(val)
 
-            k = kh_get_pymap(self.table, <PyObject*>val)
+            k = kh_get_pymap_checked(self.table, <PyObject*>val)
             if k != self.table.n_buckets:
                 locs[i] = self.table.vals[k]
             else:
@@ -1483,10 +1483,10 @@ cdef class PyObjectHashTable(HashTable):
                 labels[i] = na_sentinel
                 continue
 
-            k = kh_get_pymap(self.table, <PyObject*>val)
+            k = kh_get_pymap_checked(self.table, <PyObject*>val)
             if k == self.table.n_buckets:
                 # k hasn't been seen yet
-                k = kh_put_pymap(self.table, <PyObject*>val, &ret)
+                k = kh_put_pymap_checked(self.table, <PyObject*>val, &ret)
                 uniques.append(val)
                 if return_inverse:
                     self.table.vals[k] = count

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -6,26 +6,24 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# name, dtype, ttype, c_type, to_c_type
-dtypes = [('Complex128', 'complex128', 'complex128',
-                         'khcomplex128_t', 'to_khcomplex128_t'),
-          ('Complex64', 'complex64', 'complex64',
-                        'khcomplex64_t', 'to_khcomplex64_t'),
-          ('Float64', 'float64', 'float64', 'float64_t', ''),
-          ('Float32', 'float32', 'float32', 'float32_t', ''),
-          ('UInt64', 'uint64', 'uint64', 'uint64_t', ''),
-          ('UInt32', 'uint32', 'uint32', 'uint32_t', ''),
-          ('UInt16', 'uint16', 'uint16', 'uint16_t', ''),
-          ('UInt8', 'uint8', 'uint8', 'uint8_t', ''),
-          ('Object', 'object', 'pymap', 'object', '<PyObject*>'),
-          ('Int64', 'int64', 'int64', 'int64_t', ''),
-          ('Int32', 'int32', 'int32', 'int32_t', ''),
-          ('Int16', 'int16', 'int16', 'int16_t', ''),
-          ('Int8', 'int8', 'int8', 'int8_t', '')]
+# name, dtype, ttype, tfunc, c_type, to_c_type
+dtypes = [('Complex128', 'complex128', 'complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'),
+          ('Complex64', 'complex64', 'complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'),
+          ('Float64', 'float64', 'float64', 'float64', 'float64_t', ''),
+          ('Float32', 'float32', 'float32', 'float32', 'float32_t', ''),
+          ('UInt64', 'uint64', 'uint64', 'uint64', 'uint64_t', ''),
+          ('UInt32', 'uint32', 'uint32', 'uint32', 'uint32_t', ''),
+          ('UInt16', 'uint16', 'uint16', 'uint16', 'uint16_t', ''),
+          ('UInt8', 'uint8', 'uint8', 'uint8', 'uint8_t', ''),
+          ('Object', 'object', 'pymap', 'pymap_checked', 'object', '<PyObject*>'),
+          ('Int64', 'int64', 'int64', 'int64', 'int64_t', ''),
+          ('Int32', 'int32', 'int32', 'int32', 'int32_t', ''),
+          ('Int16', 'int16', 'int16', 'int16', 'int16_t', ''),
+          ('Int8', 'int8', 'int8', 'int8', 'int8_t', '')]
 
 }}
 
-{{for name, dtype, ttype, c_type, to_c_type in dtypes}}
+{{for name, dtype, ttype, tfunc, c_type, to_c_type in dtypes}}
 
 
 @cython.wraparound(False)
@@ -55,26 +53,26 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
     #    result_keys remembers the original order of keys
 
     result_keys = {{name}}Vector()
-    table = kh_init_{{ttype}}()
+    table = kh_init_{{tfunc}}()
 
     {{if dtype == 'object'}}
     if uses_mask:
         raise NotImplementedError("uses_mask not implemented with object dtype")
 
-    kh_resize_{{ttype}}(table, n // 10)
+    kh_resize_{{tfunc}}(table, n // 10)
 
     for i in range(n):
         val = values[i]
         if not dropna or not checknull(val):
-            k = kh_get_{{ttype}}(table, {{to_c_type}}val)
+            k = kh_get_{{tfunc}}(table, {{to_c_type}}val)
             if k != table.n_buckets:
                 table.vals[k] += 1
             else:
-                k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret)
+                k = kh_put_{{tfunc}}(table, {{to_c_type}}val, &ret)
                 table.vals[k] = 1
                 result_keys.append(val)
     {{else}}
-    kh_resize_{{ttype}}(table, n)
+    kh_resize_{{tfunc}}(table, n)
 
     for i in range(n):
         val = {{to_c_type}}(values[i])
@@ -90,11 +88,11 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
             if uses_mask and isna_entry:
                 na_counter += 1
             else:
-                k = kh_get_{{ttype}}(table, val)
+                k = kh_get_{{tfunc}}(table, val)
                 if k != table.n_buckets:
                     table.vals[k] += 1
                 else:
-                    k = kh_put_{{ttype}}(table, val, &ret)
+                    k = kh_put_{{tfunc}}(table, val, &ret)
                     table.vals[k] = 1
                     result_keys.append(val)
     {{endif}}
@@ -107,17 +105,17 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
 
     for i in range(table.size):
         {{if dtype == 'object'}}
-        k = kh_get_{{ttype}}(table, result_keys.data[i])
+        k = kh_get_{{tfunc}}(table, result_keys.data[i])
         {{else}}
-        k = kh_get_{{ttype}}(table, result_keys.data.data[i])
+        k = kh_get_{{tfunc}}(table, result_keys.data.data[i])
         {{endif}}
         result_counts[i] = table.vals[k]
 
     if na_counter > 0:
         result_counts[table.size] = na_counter
         result_keys.append(val)
 
-    kh_destroy_{{ttype}}(table)
+    kh_destroy_{{tfunc}}(table)
 
     return result_keys.to_array(), result_counts.base, na_counter
 
@@ -138,12 +136,12 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
         {{endif}}
         Py_ssize_t i, n = len(values), first_na = -1
         khiter_t k
-        kh_{{ttype}}_t *table = kh_init_{{ttype}}()
+        kh_{{ttype}}_t *table = kh_init_{{tfunc}}()
         ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
         bint seen_na = False, uses_mask = mask is not None
         bint seen_multiple_na = False
 
-    kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
+    kh_resize_{{tfunc}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
 
     if keep not in ('last', 'first', False):
         raise ValueError('keep must be either "first", "last" or False')
@@ -168,7 +166,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
                         seen_na = True
                 else:
                     value = {{to_c_type}}(values[i])
-                    kh_put_{{ttype}}(table, value, &ret)
+                    kh_put_{{tfunc}}(table, value, &ret)
                     out[i] = ret == 0
     {{endfor}}
 
@@ -193,16 +191,16 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
 
                 else:
                     value = {{to_c_type}}(values[i])
-                    k = kh_get_{{ttype}}(table, value)
+                    k = kh_get_{{tfunc}}(table, value)
                     if k != table.n_buckets:
                         out[table.vals[k]] = 1
                         out[i] = 1
                     else:
-                        k = kh_put_{{ttype}}(table, value, &ret)
+                        k = kh_put_{{tfunc}}(table, value, &ret)
                         table.vals[k] = i
                         out[i] = 0
 
-    kh_destroy_{{ttype}}(table)
+    kh_destroy_{{tfunc}}(table)
     return out
 
 
@@ -243,11 +241,11 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
         {{c_type}} val
         {{endif}}
 
-        kh_{{ttype}}_t *table = kh_init_{{ttype}}()
+        kh_{{ttype}}_t *table = kh_init_{{tfunc}}()
 
     # construct the table
     n = len(values)
-    kh_resize_{{ttype}}(table, n)
+    kh_resize_{{tfunc}}(table, n)
 
     {{if dtype == 'object'}}
     if True:
@@ -256,7 +254,7 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
     {{endif}}
         for i in range(n):
             val = {{to_c_type}}(values[i])
-            kh_put_{{ttype}}(table, val, &ret)
+            kh_put_{{tfunc}}(table, val, &ret)
 
     # test membership
     n = len(arr)
@@ -269,10 +267,10 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
     {{endif}}
         for i in range(n):
             val = {{to_c_type}}(arr[i])
-            k = kh_get_{{ttype}}(table, val)
+            k = kh_get_{{tfunc}}(table, val)
             result[i] = (k != table.n_buckets)
 
-    kh_destroy_{{ttype}}(table)
+    kh_destroy_{{tfunc}}(table)
     return result.view(np.bool_)
 
 # ----------------------------------------------------------------------

diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -3,7 +3,6 @@
 #pragma once
 
 #include <Python.h>
-
 #include <pymem.h>
 #include <string.h>
 
@@ -192,7 +191,16 @@ static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) {
   return 1;
 }
 
+static inline int _is_pandas_NA_type(PyObject *o) {
+  // TODO compare PyTypeObject* C_NA, not strings!
+  PyObject *type_name = PyType_GetName(Py_TYPE(o));
+  return PyUnicode_CompareWithASCIIString(type_name, "NAType") == 0;
+}
+
 static inline int pyobject_cmp(PyObject *a, PyObject *b) {
+  if (PyErr_Occurred() != NULL) {
+    return 0;
+  }
   if (a == b) {
     return 1;
   }
@@ -211,11 +219,12 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
       return tupleobject_cmp((PyTupleObject *)a, (PyTupleObject *)b);
     }
     // frozenset isn't yet supported
+  } else if (_is_pandas_NA_type(a) || _is_pandas_NA_type(b)) {
+    return 0;
   }
 
   int result = PyObject_RichCompareBool(a, b, Py_EQ);
   if (result < 0) {
-    PyErr_Clear();
     return 0;
   }
   return result;
@@ -292,6 +301,9 @@ static inline Py_hash_t tupleobject_hash(PyTupleObject *key) {
 }
 
 static inline khuint32_t kh_python_hash_func(PyObject *key) {
+  if (PyErr_Occurred() != NULL) {
+    return 0;
+  }
   Py_hash_t hash;
   // For PyObject_Hash holds:
   //    hash(0.0) == 0 == hash(-0.0)
@@ -310,12 +322,19 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
   } else if (PyTuple_Check(key)) {
     // hash tuple subclasses as builtin tuples
     hash = tupleobject_hash((PyTupleObject *)key);
+  } else if (PyDict_Check(key) || PyList_Check(key)) {
+    // Before GH 57052 was fixed, all exceptions raised from PyObject_Hash were
+    // suppressed. Existing code that relies on this behaviour is for example:
+    //   * _libs.hashtable.value_count_object via DataFrame.describe
+    //   * _libs.hashtable.ismember_object via Series.isin
+    // Using hash = 0 puts all dict and list objects in the same bucket,
+    // which is bad for performance but that is how it worked before.
+    hash = 0;
   } else {
     hash = PyObject_Hash(key);
   }
 
   if (hash == -1) {
-    PyErr_Clear();
     return 0;
   }
 #if SIZEOF_PY_HASH_T == 4

diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd
@@ -125,5 +125,13 @@ cdef extern from "pandas/vendored/klib/khash_python.h":
 
     khuint_t kh_needed_n_buckets(khuint_t element_n) nogil
 
+cdef kh_pymap_t* kh_init_pymap_checked()
+cdef void kh_destroy_pymap_checked(kh_pymap_t*)
+cdef void kh_clear_pymap_checked(kh_pymap_t*)
+cdef khuint_t kh_get_pymap_checked(kh_pymap_t*, PyObject*)
+cdef void kh_resize_pymap_checked(kh_pymap_t*, khuint_t)
+cdef khuint_t kh_put_pymap_checked(kh_pymap_t*, PyObject*, int*)
+cdef void kh_del_pymap_checked(kh_pymap_t*, khuint_t)
+cdef bint kh_exist_pymap_checked(kh_pymap_t*, khiter_t)
 
 include "khash_for_primitive_helper.pxi"