From f7df8af478861baff99083fcce1c53f405956657 Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Sat, 25 Oct 2025 23:05:51 -0400
Subject: [PATCH 01/16] try triggering exceptions before entering khash

---
 pandas/_libs/hashtable_class_helper.pxi.in | 10 +++
 pandas/tests/libs/test_hashtable.py        | 78 ++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index eae393f33bfd3..7182f078720f0 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -1356,6 +1356,14 @@ cdef class PyObjectHashTable(HashTable):
         cdef:
             khiter_t k
 
+        # GH 57052
+        # in khash_python.h, kh_python_hash_equal and kh_python_hash_func will be called repeatedly by khash in a loop.
+        # if object implements custom __hash__ and __eq__ methods that can raise exceptions,
+        # kh_python_hash_{equal,func} will suppress the exceptions without warnings.
+        # as a workaround: try triggering exceptions here, before starting the khash loop
+        hash(val)
+        val == val
+
         k = kh_get_pymap(self.table, <PyObject*>val)
         if k != self.table.n_buckets:
             return self.table.vals[k]
@@ -1369,6 +1377,8 @@ cdef class PyObjectHashTable(HashTable):
             char* buf
 
         hash(key)
+        # GH 57052
+        key == key
 
         k = kh_put_pymap(self.table, <PyObject*>key, &ret)
         if kh_exist_pymap(self.table, k):
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 6a95cfc7355d8..5f8eb8c540951 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -1,6 +1,7 @@
 from collections import namedtuple
 from collections.abc import Generator
 from contextlib import contextmanager
+from itertools import product
 import re
 import struct
 import tracemalloc
@@ -780,3 +781,80 @@ def test_float_complex_int_are_equal_as_objects():
     result = isin(np.array(values, dtype=object), np.asarray(comps))
     expected = np.array([False, True, True, True], dtype=np.bool_)
     tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "throw1hash, throw2hash, throw1eq, throw2eq",
+    product([True, False], repeat=4),
+)
+def test_exceptions_thrown_from_custom_hash_and_eq_methods(
+    throw1hash, throw2hash, throw1eq, throw2eq
+):
+    # GH 57052
+    class testkey:
+        def __init__(self, value, throw_hash=False, throw_eq=False):
+            self.value = value
+            self.throw_hash = throw_hash
+            self.throw_eq = throw_eq
+
+        def __hash__(self):
+            if self.throw_hash:
+                raise RuntimeError(f"exception in {self!r}.__hash__")
+            return hash(self.value)
+
+        def __eq__(self, other):
+            if self.throw_eq:
+                raise RuntimeError(f"exception in {self!r}.__eq__")
+            return self.value == other.value
+
+        def __repr__(self):
+            return f"{self.__class__.__name__}({self.value}, {self.throw_hash}, {self.throw_eq})"
+
+    table = ht.PyObjectHashTable()
+
+    key1 = testkey(value="hello1")
+    key2 = testkey(value="hello2")
+
+    table.set_item(key1, 123)
+    table.set_item(key2, 456)
+
+    key1.throw_hash = throw1hash
+    key2.throw_hash = throw2hash
+    key1.throw_eq = throw1eq
+    key2.throw_eq = throw2eq
+
+    if throw1hash and throw1eq:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key1!r}.") + "__(hash|eq)__"
+        ):
+            table.get_item(key1)
+    elif throw1hash:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key1!r}.__hash__")
+        ):
+            table.get_item(key1)
+    elif throw1eq:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key1!r}.__eq__")
+        ):
+            table.get_item(key1)
+    else:
+        assert table.get_item(key1) == 123
+
+    if throw2hash and throw2eq:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key2!r}.") + "__(hash|eq)__"
+        ):
+            table.get_item(key2)
+    elif throw2hash:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key2!r}.__hash__")
+        ):
+            table.get_item(key2)
+    elif throw2eq:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key2!r}.__eq__")
+        ):
+            table.get_item(key2)
+    else:
+        assert table.get_item(key2) == 456

From cf1e29e531a6d3a194a483e77e3a27b7d988c257 Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Mon, 27 Oct 2025 22:18:09 -0400
Subject: [PATCH 02/16] update whatsnew

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 995e7676afbca..19e159abf0e26 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1210,6 +1210,7 @@ Styler
 
 Other
 ^^^^^
+- Bug :class:``PyObjectHashTable`` that would silently suppress exceptions thrown from custom ``__hash__`` and ``__eq__`` methods during hashing (:issue:`57052`)
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`)
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)

From 76f94fa5d899555dd92868cd055666a021216a16 Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Mon, 27 Oct 2025 22:19:25 -0400
Subject: [PATCH 03/16] fix precommit

---
 pandas/tests/libs/test_hashtable.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 5f8eb8c540951..efdf0ae3a18fb 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -808,7 +808,7 @@ def __eq__(self, other):
             return self.value == other.value
 
         def __repr__(self):
-            return f"{self.__class__.__name__}({self.value}, {self.throw_hash}, {self.throw_eq})"
+            return f"testkey({self.value}, {self.throw_hash}, {self.throw_eq})"
 
     table = ht.PyObjectHashTable()
 

From 6729fe823d889707ba4ff4ab750f4760f2f0679d Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Mon, 27 Oct 2025 22:33:43 -0400
Subject: [PATCH 04/16] fix typo

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 19e159abf0e26..affd79468a37f 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1210,7 +1210,7 @@ Styler
 
 Other
 ^^^^^
-- Bug :class:``PyObjectHashTable`` that would silently suppress exceptions thrown from custom ``__hash__`` and ``__eq__`` methods during hashing (:issue:`57052`)
+- Bug in :class:``PyObjectHashTable`` that would silently suppress exceptions thrown from custom ``__hash__`` and ``__eq__`` methods during hashing (:issue:`57052`)
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`)
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)

From be9d047a61da2d2df86ae9b02aafe5f5c33a209d Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Mon, 27 Oct 2025 22:56:42 -0400
Subject: [PATCH 05/16] fix precommit

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index affd79468a37f..b5049e734739b 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1210,9 +1210,9 @@ Styler
 
 Other
 ^^^^^
-- Bug in :class:``PyObjectHashTable`` that would silently suppress exceptions thrown from custom ``__hash__`` and ``__eq__`` methods during hashing (:issue:`57052`)
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`)
+- Bug in :class:``PyObjectHashTable`` that would silently suppress exceptions thrown from custom ``__hash__`` and ``__eq__`` methods during hashing (:issue:`57052`)
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
 - Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)

From 318fe86595e781810dbe32d4df6c655b8ecab599 Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Tue, 28 Oct 2025 22:41:06 -0400
Subject: [PATCH 06/16] rewrite hash fix, add tests

---
 pandas/_libs/hashtable_class_helper.pxi.in    |  30 ++--
 .../pandas/vendored/klib/khash_python.h       |   8 +-
 pandas/tests/frame/indexing/test_indexing.py  |  24 ++++
 pandas/tests/libs/test_hashtable.py           | 132 ++++++++++++------
 4 files changed, 138 insertions(+), 56 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 7182f078720f0..fd5b9cc8b7910 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -4,6 +4,8 @@ Template for each `dtype` helper function for hashtable
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 from cpython.unicode cimport PyUnicode_AsUTF8
+from cpython.exc cimport PyErr_Occurred, PyErr_Fetch
+from cpython.ref cimport Py_XDECREF
 
 {{py:
 
@@ -1309,6 +1311,22 @@ cdef class StringHashTable(HashTable):
         return labels
 
 
+cdef raise_if_errors():
+    cdef:
+        object exc
+        PyObject *type
+        PyObject *value
+        PyObject *traceback
+
+    PyErr_Fetch(&type, &value, &traceback)
+    if value != NULL:
+        exc = <object>value
+        Py_XDECREF(value)
+        Py_XDECREF(type)
+        Py_XDECREF(traceback)
+        raise exc
+
+
 cdef class PyObjectHashTable(HashTable):
 
     def __init__(self, int64_t size_hint=1):
@@ -1356,15 +1374,8 @@ cdef class PyObjectHashTable(HashTable):
         cdef:
             khiter_t k
 
-        # GH 57052
-        # in khash_python.h, kh_python_hash_equal and kh_python_hash_func will be called repeatedly by khash in a loop.
-        # if object implements custom __hash__ and __eq__ methods that can raise exceptions,
-        # kh_python_hash_{equal,func} will suppress the exceptions without warnings.
-        # as a workaround: try triggering exceptions here, before starting the khash loop
-        hash(val)
-        val == val
-
         k = kh_get_pymap(self.table, <PyObject*>val)
+        raise_if_errors()
         if k != self.table.n_buckets:
             return self.table.vals[k]
         else:
@@ -1377,10 +1388,9 @@ cdef class PyObjectHashTable(HashTable):
             char* buf
 
         hash(key)
-        # GH 57052
-        key == key
 
         k = kh_put_pymap(self.table, <PyObject*>key, &ret)
+        raise_if_errors()
         if kh_exist_pymap(self.table, k):
             self.table.vals[k] = val
         else:
diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index e0bb96d57b9e1..fa1ea1430f917 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -193,6 +193,9 @@ static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) {
 }
 
 static inline int pyobject_cmp(PyObject *a, PyObject *b) {
+  if (PyErr_Occurred() != NULL) {
+    return 0;
+  }
   if (a == b) {
     return 1;
   }
@@ -215,7 +218,6 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
 
   int result = PyObject_RichCompareBool(a, b, Py_EQ);
   if (result < 0) {
-    PyErr_Clear();
     return 0;
   }
   return result;
@@ -292,6 +294,9 @@ static inline Py_hash_t tupleobject_hash(PyTupleObject *key) {
 }
 
 static inline khuint32_t kh_python_hash_func(PyObject *key) {
+  if (PyErr_Occurred() != NULL) {
+    return 0;
+  }
   Py_hash_t hash;
   // For PyObject_Hash holds:
   //    hash(0.0) == 0 == hash(-0.0)
@@ -315,7 +320,6 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
   }
 
   if (hash == -1) {
-    PyErr_Clear();
     return 0;
   }
 #if SIZEOF_PY_HASH_T == 4
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 2b36c1135d36d..82d31889a6ffe 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1943,3 +1943,27 @@ def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, index
     def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer):
         df = DataFrame({"a": [1, 2, None]}, dtype=float_numpy_dtype)
         self._check_setitem_invalid(df, invalid, indexer)
+
+
+def test_error_raised_from_custom_hash_method():
+    # GH 57052
+    class testkey:
+        def __init__(self, value):
+            self.value = value
+
+        def __hash__(self):
+            raise RuntimeError(f"exception in {self!r}.__hash__")
+
+        def __eq__(self, other):
+            return self.value == other.value
+
+        def __repr__(self):
+            return f"testkey({self.value})"
+
+    df = DataFrame({"i": map(testkey, range(10))}).set_index("i")
+    for i in range(len(df.index)):
+        key = testkey(i)
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key!r}.__hash__")
+        ):
+            df.loc[key]
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index efdf0ae3a18fb..42d486d6fd524 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -783,33 +783,57 @@ def test_float_complex_int_are_equal_as_objects():
     tm.assert_numpy_array_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "throw1hash, throw2hash, throw1eq, throw2eq",
-    product([True, False], repeat=4),
-)
-def test_exceptions_thrown_from_custom_hash_and_eq_methods(
-    throw1hash, throw2hash, throw1eq, throw2eq
-):
+class testkey:
     # GH 57052
-    class testkey:
-        def __init__(self, value, throw_hash=False, throw_eq=False):
-            self.value = value
-            self.throw_hash = throw_hash
-            self.throw_eq = throw_eq
+    def __init__(self, value, throw_hash=False, throw_eq=False):
+        self.value = value
+        self.throw_hash = throw_hash
+        self.throw_eq = throw_eq
+
+    def __hash__(self):
+        if self.throw_hash:
+            raise RuntimeError(f"exception in {self!r}.__hash__")
+        return hash(self.value)
+
+    def __eq__(self, other):
+        if self.throw_eq:
+            raise RuntimeError(f"exception in {self!r}.__eq__")
+        return self.value == other.value
 
-        def __hash__(self):
-            if self.throw_hash:
-                raise RuntimeError(f"exception in {self!r}.__hash__")
-            return hash(self.value)
+    def __repr__(self):
+        return f"testkey({self.value}, {self.throw_hash}, {self.throw_eq})"
+
+
+@pytest.mark.parametrize("throw1, throw2", product([True, False], repeat=2))
+def test_error_raised_from_hash_method_in_set_item(throw1, throw2):
+    # GH 57052
+    table = ht.PyObjectHashTable()
+
+    key1 = testkey(value="hello1", throw_hash=throw1)
+    key2 = testkey(value="hello2", throw_hash=throw2)
+
+    if throw1:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key1!r}.__hash__")
+        ):
+            table.set_item(key1, 123)
+    else:
+        table.set_item(key1, 123)
+        assert table.get_item(key1) == 123
 
-        def __eq__(self, other):
-            if self.throw_eq:
-                raise RuntimeError(f"exception in {self!r}.__eq__")
-            return self.value == other.value
+    if throw2:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key2!r}.__hash__")
+        ):
+            table.set_item(key2, 456)
+    else:
+        table.set_item(key2, 456)
+        assert table.get_item(key2) == 456
 
-        def __repr__(self):
-            return f"testkey({self.value}, {self.throw_hash}, {self.throw_eq})"
 
+@pytest.mark.parametrize("throw1, throw2", product([True, False], repeat=2))
+def test_error_raised_from_hash_method_in_get_item(throw1, throw2):
+    # GH 57052
     table = ht.PyObjectHashTable()
 
     key1 = testkey(value="hello1")
@@ -818,43 +842,63 @@ def __repr__(self):
     table.set_item(key1, 123)
     table.set_item(key2, 456)
 
-    key1.throw_hash = throw1hash
-    key2.throw_hash = throw2hash
-    key1.throw_eq = throw1eq
-    key2.throw_eq = throw2eq
+    key1.throw_hash = throw1
+    key2.throw_hash = throw2
 
-    if throw1hash and throw1eq:
-        with pytest.raises(
-            RuntimeError, match=re.escape(f"exception in {key1!r}.") + "__(hash|eq)__"
-        ):
-            table.get_item(key1)
-    elif throw1hash:
+    if throw1:
         with pytest.raises(
             RuntimeError, match=re.escape(f"exception in {key1!r}.__hash__")
         ):
             table.get_item(key1)
-    elif throw1eq:
-        with pytest.raises(
-            RuntimeError, match=re.escape(f"exception in {key1!r}.__eq__")
-        ):
-            table.get_item(key1)
     else:
         assert table.get_item(key1) == 123
 
-    if throw2hash and throw2eq:
+    if throw2:
         with pytest.raises(
-            RuntimeError, match=re.escape(f"exception in {key2!r}.") + "__(hash|eq)__"
+            RuntimeError, match=re.escape(f"exception in {key2!r}.__hash__")
         ):
             table.get_item(key2)
-    elif throw2hash:
+    else:
+        assert table.get_item(key2) == 456
+
+
+@pytest.mark.parametrize("throw", [True, False])
+def test_error_raised_from_eq_method_in_set_item(throw):
+    # GH 57052
+    table = ht.PyObjectHashTable()
+
+    key1 = testkey(value="hello", throw_eq=throw)
+    key2 = testkey(value=key1.value)
+
+    if throw:
+        table.set_item(key1, 123)
         with pytest.raises(
-            RuntimeError, match=re.escape(f"exception in {key2!r}.__hash__")
+            RuntimeError, match=re.escape(f"exception in {key1!r}.__eq__")
         ):
-            table.get_item(key2)
-    elif throw2eq:
+            table.set_item(key2, 456)
+    else:
+        table.set_item(key2, 456)
+        assert table.get_item(key2) == 456
+
+
+@pytest.mark.parametrize("throw", [True, False])
+def test_error_raised_from_eq_method_in_get_item(throw):
+    # GH 57052
+    table = ht.PyObjectHashTable()
+
+    key1 = testkey(value="hello")
+    key2 = testkey(value=key1.value)
+
+    table.set_item(key1, 123)
+    table.set_item(key2, 456)
+
+    if throw:
+        key1.throw_eq = True
         with pytest.raises(
-            RuntimeError, match=re.escape(f"exception in {key2!r}.__eq__")
+            RuntimeError, match=re.escape(f"exception in {key1!r}.__eq__")
         ):
             table.get_item(key2)
     else:
+        # this looks odd but it is because key1.value == key2.value
+        assert table.get_item(key1) == 456
         assert table.get_item(key2) == 456

From a7994c85a59480113ea02dd934c7d73bb6a736ab Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Tue, 28 Oct 2025 23:27:27 -0400
Subject: [PATCH 07/16] raise exceptions from map_locations

---
 pandas/_libs/hashtable_class_helper.pxi.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index fd5b9cc8b7910..0925b8e66529d 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -1409,6 +1409,7 @@ cdef class PyObjectHashTable(HashTable):
             hash(val)
 
             k = kh_put_pymap(self.table, <PyObject*>val, &ret)
+            raise_if_errors()
             self.table.vals[k] = i
 
     def lookup(self, ndarray[object] values, object mask = None) -> ndarray:

From c25ab5b1a814553104eaed4d9f692bb3931e4933 Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Thu, 30 Oct 2025 22:14:07 -0400
Subject: [PATCH 08/16] implement fully checked kh_pymap interface

---
 pandas/_libs/hashtable_class_helper.pxi.in    | 45 +++--------
 pandas/_libs/hashtable_func_helper.pxi.in     | 74 +++++++++---------
 .../pandas/vendored/klib/khash_python.h       |  6 ++
 pandas/_libs/khash.pxd                        |  8 ++
 pandas/_libs/khash.pyx                        | 75 +++++++++++++++++++
 pandas/_libs/meson.build                      |  1 +
 6 files changed, 138 insertions(+), 71 deletions(-)
 create mode 100644 pandas/_libs/khash.pyx

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 0925b8e66529d..05a697fcc0c2c 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -4,8 +4,6 @@ Template for each `dtype` helper function for hashtable
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 from cpython.unicode cimport PyUnicode_AsUTF8
-from cpython.exc cimport PyErr_Occurred, PyErr_Fetch
-from cpython.ref cimport Py_XDECREF
 
 {{py:
 
@@ -75,7 +73,7 @@ cimported_types = ['complex64',
                    'int16',
                    'int32',
                    'int64',
-                   'pymap',
+                   'pymap_checked',
                    'str',
                    'strbox',
                    'uint8',
@@ -1311,32 +1309,16 @@ cdef class StringHashTable(HashTable):
         return labels
 
 
-cdef raise_if_errors():
-    cdef:
-        object exc
-        PyObject *type
-        PyObject *value
-        PyObject *traceback
-
-    PyErr_Fetch(&type, &value, &traceback)
-    if value != NULL:
-        exc = <object>value
-        Py_XDECREF(value)
-        Py_XDECREF(type)
-        Py_XDECREF(traceback)
-        raise exc
-
-
 cdef class PyObjectHashTable(HashTable):
 
     def __init__(self, int64_t size_hint=1):
-        self.table = kh_init_pymap()
+        self.table = kh_init_pymap_checked()
         size_hint = min(kh_needed_n_buckets(size_hint), SIZE_HINT_LIMIT)
-        kh_resize_pymap(self.table, size_hint)
+        kh_resize_pymap_checked(self.table, size_hint)
 
     def __dealloc__(self):
         if self.table is not NULL:
-            kh_destroy_pymap(self.table)
+            kh_destroy_pymap_checked(self.table)
             self.table = NULL
 
     def __len__(self) -> int:
@@ -1347,7 +1329,7 @@ cdef class PyObjectHashTable(HashTable):
             khiter_t k
         hash(key)
 
-        k = kh_get_pymap(self.table, <PyObject*>key)
+        k = kh_get_pymap_checked(self.table, <PyObject*>key)
         return k != self.table.n_buckets
 
     def sizeof(self, deep: bool = False) -> int:
@@ -1374,8 +1356,7 @@ cdef class PyObjectHashTable(HashTable):
         cdef:
             khiter_t k
 
-        k = kh_get_pymap(self.table, <PyObject*>val)
-        raise_if_errors()
+        k = kh_get_pymap_checked(self.table, <PyObject*>val)
         if k != self.table.n_buckets:
             return self.table.vals[k]
         else:
@@ -1389,9 +1370,8 @@ cdef class PyObjectHashTable(HashTable):
 
         hash(key)
 
-        k = kh_put_pymap(self.table, <PyObject*>key, &ret)
-        raise_if_errors()
-        if kh_exist_pymap(self.table, k):
+        k = kh_put_pymap_checked(self.table, <PyObject*>key, &ret)
+        if kh_exist_pymap_checked(self.table, k):
             self.table.vals[k] = val
         else:
             raise KeyError(key)
@@ -1408,8 +1388,7 @@ cdef class PyObjectHashTable(HashTable):
             val = values[i]
             hash(val)
 
-            k = kh_put_pymap(self.table, <PyObject*>val, &ret)
-            raise_if_errors()
+            k = kh_put_pymap_checked(self.table, <PyObject*>val, &ret)
             self.table.vals[k] = i
 
     def lookup(self, ndarray[object] values, object mask = None) -> ndarray:
@@ -1426,7 +1405,7 @@ cdef class PyObjectHashTable(HashTable):
             val = values[i]
             hash(val)
 
-            k = kh_get_pymap(self.table, <PyObject*>val)
+            k = kh_get_pymap_checked(self.table, <PyObject*>val)
             if k != self.table.n_buckets:
                 locs[i] = self.table.vals[k]
             else:
@@ -1504,10 +1483,10 @@ cdef class PyObjectHashTable(HashTable):
                 labels[i] = na_sentinel
                 continue
 
-            k = kh_get_pymap(self.table, <PyObject*>val)
+            k = kh_get_pymap_checked(self.table, <PyObject*>val)
             if k == self.table.n_buckets:
                 # k hasn't been seen yet
-                k = kh_put_pymap(self.table, <PyObject*>val, &ret)
+                k = kh_put_pymap_checked(self.table, <PyObject*>val, &ret)
                 uniques.append(val)
                 if return_inverse:
                     self.table.vals[k] = count
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 3487f5ebd050d..6ffeff9b5f3ab 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -6,26 +6,24 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# name, dtype, ttype, c_type, to_c_type
-dtypes = [('Complex128', 'complex128', 'complex128',
-                         'khcomplex128_t', 'to_khcomplex128_t'),
-          ('Complex64', 'complex64', 'complex64',
-                        'khcomplex64_t', 'to_khcomplex64_t'),
-          ('Float64', 'float64', 'float64', 'float64_t', ''),
-          ('Float32', 'float32', 'float32', 'float32_t', ''),
-          ('UInt64', 'uint64', 'uint64', 'uint64_t', ''),
-          ('UInt32', 'uint32', 'uint32', 'uint32_t', ''),
-          ('UInt16', 'uint16', 'uint16', 'uint16_t', ''),
-          ('UInt8', 'uint8', 'uint8', 'uint8_t', ''),
-          ('Object', 'object', 'pymap', 'object', '<PyObject*>'),
-          ('Int64', 'int64', 'int64', 'int64_t', ''),
-          ('Int32', 'int32', 'int32', 'int32_t', ''),
-          ('Int16', 'int16', 'int16', 'int16_t', ''),
-          ('Int8', 'int8', 'int8', 'int8_t', '')]
+# name, dtype, ttype, tfunc_type, c_type, to_c_type
+dtypes = [('Complex128', 'complex128', 'complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'),
+          ('Complex64', 'complex64', 'complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'),
+          ('Float64', 'float64', 'float64', 'float64', 'float64_t', ''),
+          ('Float32', 'float32', 'float32', 'float32', 'float32_t', ''),
+          ('UInt64', 'uint64', 'uint64', 'uint64', 'uint64_t', ''),
+          ('UInt32', 'uint32', 'uint32', 'uint32', 'uint32_t', ''),
+          ('UInt16', 'uint16', 'uint16', 'uint16', 'uint16_t', ''),
+          ('UInt8', 'uint8', 'uint8', 'uint8', 'uint8_t', ''),
+          ('Object', 'object', 'pymap', 'pymap_checked', 'object', '<PyObject*>'),
+          ('Int64', 'int64', 'int64', 'int64', 'int64_t', ''),
+          ('Int32', 'int32', 'int32', 'int32', 'int32_t', ''),
+          ('Int16', 'int16', 'int16', 'int16', 'int16_t', ''),
+          ('Int8', 'int8', 'int8', 'int8', 'int8_t', '')]
 
 }}
 
-{{for name, dtype, ttype, c_type, to_c_type in dtypes}}
+{{for name, dtype, ttype, tfunc_type, c_type, to_c_type in dtypes}}
 
 
 @cython.wraparound(False)
@@ -55,26 +53,26 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
     #    result_keys remembers the original order of keys
 
     result_keys = {{name}}Vector()
-    table = kh_init_{{ttype}}()
+    table = kh_init_{{tfunc_type}}()
 
     {{if dtype == 'object'}}
     if uses_mask:
         raise NotImplementedError("uses_mask not implemented with object dtype")
 
-    kh_resize_{{ttype}}(table, n // 10)
+    kh_resize_{{tfunc_type}}(table, n // 10)
 
     for i in range(n):
         val = values[i]
         if not dropna or not checknull(val):
-            k = kh_get_{{ttype}}(table, {{to_c_type}}val)
+            k = kh_get_{{tfunc_type}}(table, {{to_c_type}}val)
             if k != table.n_buckets:
                 table.vals[k] += 1
             else:
-                k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret)
+                k = kh_put_{{tfunc_type}}(table, {{to_c_type}}val, &ret)
                 table.vals[k] = 1
                 result_keys.append(val)
     {{else}}
-    kh_resize_{{ttype}}(table, n)
+    kh_resize_{{tfunc_type}}(table, n)
 
     for i in range(n):
         val = {{to_c_type}}(values[i])
@@ -90,11 +88,11 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
             if uses_mask and isna_entry:
                 na_counter += 1
             else:
-                k = kh_get_{{ttype}}(table, val)
+                k = kh_get_{{tfunc_type}}(table, val)
                 if k != table.n_buckets:
                     table.vals[k] += 1
                 else:
-                    k = kh_put_{{ttype}}(table, val, &ret)
+                    k = kh_put_{{tfunc_type}}(table, val, &ret)
                     table.vals[k] = 1
                     result_keys.append(val)
     {{endif}}
@@ -107,9 +105,9 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
 
     for i in range(table.size):
         {{if dtype == 'object'}}
-        k = kh_get_{{ttype}}(table, result_keys.data[i])
+        k = kh_get_{{tfunc_type}}(table, result_keys.data[i])
         {{else}}
-        k = kh_get_{{ttype}}(table, result_keys.data.data[i])
+        k = kh_get_{{tfunc_type}}(table, result_keys.data.data[i])
         {{endif}}
         result_counts[i] = table.vals[k]
 
@@ -117,7 +115,7 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
         result_counts[table.size] = na_counter
         result_keys.append(val)
 
-    kh_destroy_{{ttype}}(table)
+    kh_destroy_{{tfunc_type}}(table)
 
     return result_keys.to_array(), result_counts.base, na_counter
 
@@ -138,12 +136,12 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
         {{endif}}
         Py_ssize_t i, n = len(values), first_na = -1
         khiter_t k
-        kh_{{ttype}}_t *table = kh_init_{{ttype}}()
+        kh_{{ttype}}_t *table = kh_init_{{tfunc_type}}()
         ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
         bint seen_na = False, uses_mask = mask is not None
         bint seen_multiple_na = False
 
-    kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
+    kh_resize_{{tfunc_type}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
 
     if keep not in ('last', 'first', False):
         raise ValueError('keep must be either "first", "last" or False')
@@ -168,7 +166,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
                         seen_na = True
                 else:
                     value = {{to_c_type}}(values[i])
-                    kh_put_{{ttype}}(table, value, &ret)
+                    kh_put_{{tfunc_type}}(table, value, &ret)
                     out[i] = ret == 0
     {{endfor}}
 
@@ -193,16 +191,16 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
 
                 else:
                     value = {{to_c_type}}(values[i])
-                    k = kh_get_{{ttype}}(table, value)
+                    k = kh_get_{{tfunc_type}}(table, value)
                     if k != table.n_buckets:
                         out[table.vals[k]] = 1
                         out[i] = 1
                     else:
-                        k = kh_put_{{ttype}}(table, value, &ret)
+                        k = kh_put_{{tfunc_type}}(table, value, &ret)
                         table.vals[k] = i
                         out[i] = 0
 
-    kh_destroy_{{ttype}}(table)
+    kh_destroy_{{tfunc_type}}(table)
     return out
 
 
@@ -243,11 +241,11 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
         {{c_type}} val
         {{endif}}
 
-        kh_{{ttype}}_t *table = kh_init_{{ttype}}()
+        kh_{{ttype}}_t *table = kh_init_{{tfunc_type}}()
 
     # construct the table
     n = len(values)
-    kh_resize_{{ttype}}(table, n)
+    kh_resize_{{tfunc_type}}(table, n)
 
     {{if dtype == 'object'}}
     if True:
@@ -256,7 +254,7 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
     {{endif}}
         for i in range(n):
             val = {{to_c_type}}(values[i])
-            kh_put_{{ttype}}(table, val, &ret)
+            kh_put_{{tfunc_type}}(table, val, &ret)
 
     # test membership
     n = len(arr)
@@ -269,10 +267,10 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
     {{endif}}
         for i in range(n):
             val = {{to_c_type}}(arr[i])
-            k = kh_get_{{ttype}}(table, val)
+            k = kh_get_{{tfunc_type}}(table, val)
             result[i] = (k != table.n_buckets)
 
-    kh_destroy_{{ttype}}(table)
+    kh_destroy_{{tfunc_type}}(table)
     return result.view(np.bool_)
 
 # ----------------------------------------------------------------------
diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index fa1ea1430f917..45e44dc66d135 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -218,6 +218,9 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
 
   int result = PyObject_RichCompareBool(a, b, Py_EQ);
   if (result < 0) {
+    if (PyErr_Occurred() != NULL) {
+      return 0;
+    }
     return 0;
   }
   return result;
@@ -320,6 +323,9 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
   }
 
   if (hash == -1) {
+    if (PyErr_Occurred() != NULL) {
+      return 0;
+    }
     return 0;
   }
 #if SIZEOF_PY_HASH_T == 4
diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd
index c439e1cca772b..f8c30eec113f7 100644
--- a/pandas/_libs/khash.pxd
+++ b/pandas/_libs/khash.pxd
@@ -125,5 +125,13 @@ cdef extern from "pandas/vendored/klib/khash_python.h":
 
     khuint_t kh_needed_n_buckets(khuint_t element_n) nogil
 
+cdef kh_pymap_t* kh_init_pymap_checked()
+cdef void kh_destroy_pymap_checked(kh_pymap_t*)
+cdef void kh_clear_pymap_checked(kh_pymap_t*)
+cdef khuint_t kh_get_pymap_checked(kh_pymap_t*, PyObject*)
+cdef void kh_resize_pymap_checked(kh_pymap_t*, khuint_t)
+cdef khuint_t kh_put_pymap_checked(kh_pymap_t*, PyObject*, int*)
+cdef void kh_del_pymap_checked(kh_pymap_t*, khuint_t)
+cdef bint kh_exist_pymap_checked(kh_pymap_t*, khiter_t)
 
 include "khash_for_primitive_helper.pxi"
diff --git a/pandas/_libs/khash.pyx b/pandas/_libs/khash.pyx
new file mode 100644
index 0000000000000..2c4d37d6f1e8a
--- /dev/null
+++ b/pandas/_libs/khash.pyx
@@ -0,0 +1,75 @@
+from cpython.object cimport PyObject
+from cpython.exc cimport PyErr_Occurred, PyErr_Fetch
+from cpython.ref cimport Py_XDECREF
+
+
+cdef inline raise_if_errors():
+    cdef:
+        object exc_type
+        object exc_value
+        PyObject *type
+        PyObject *value
+        PyObject *traceback
+
+    if PyErr_Occurred():
+        PyErr_Fetch(&type, &value, &traceback)
+        Py_XDECREF(traceback)
+        if value != NULL:
+            exc_value = <object>value
+            if isinstance(exc_value, str):
+                if type != NULL:
+                    exc_type = <object>type
+                else:
+                    exc_type = RuntimeError
+                Py_XDECREF(type)
+                raise exc_type(exc_value)
+            else:
+                Py_XDECREF(type)
+                raise exc_value
+
+
+cdef kh_pymap_t* kh_init_pymap_checked():
+    cdef kh_pymap_t* table = kh_init_pymap()
+    if PyErr_Occurred():
+        kh_destroy_pymap(table)
+        table = NULL
+    raise_if_errors()
+    return table
+
+
+cdef void kh_destroy_pymap_checked(kh_pymap_t* table):
+    kh_destroy_pymap(table)
+    raise_if_errors()
+
+
+cdef void kh_clear_pymap_checked(kh_pymap_t* table):
+    kh_clear_pymap(table)
+    raise_if_errors()
+
+
+cdef khuint_t kh_get_pymap_checked(kh_pymap_t* table, PyObject* key):
+    cdef khuint_t k = kh_get_pymap(table, key)
+    raise_if_errors()
+    return k
+
+
+cdef void kh_resize_pymap_checked(kh_pymap_t* table, khuint_t new_n_buckets):
+    kh_resize_pymap(table, new_n_buckets)
+    raise_if_errors()
+
+
+cdef khuint_t kh_put_pymap_checked(kh_pymap_t* table, PyObject* key, int* ret):
+    cdef khuint_t result = kh_put_pymap(table, key, ret)
+    raise_if_errors()
+    return result
+
+
+cdef void kh_del_pymap_checked(kh_pymap_t* table, khuint_t k):
+    kh_del_pymap(table, k)
+    raise_if_errors()
+
+
+cdef bint kh_exist_pymap_checked(kh_pymap_t* table, khiter_t k):
+    cdef bint res = kh_exist_pymap(table, k)
+    raise_if_errors()
+    return res
diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index 33fc65e5034d0..f6d6e74648cf3 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -97,6 +97,7 @@ libs_sources = {
         'sources': ['join.pyx', _khash_primitive_helper],
         'deps': _khash_primitive_helper_dep,
     },
+    'khash': {'sources': ['khash.pyx']},
     'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']},
     'missing': {'sources': ['missing.pyx']},
     'pandas_datetime': {

From c87a388696a955f2da603327aadc1043d861645d Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Thu, 30 Oct 2025 22:16:54 -0400
Subject: [PATCH 09/16] isort

---
 pandas/_libs/khash.pyx | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/khash.pyx b/pandas/_libs/khash.pyx
index 2c4d37d6f1e8a..c2b684100131f 100644
--- a/pandas/_libs/khash.pyx
+++ b/pandas/_libs/khash.pyx
@@ -1,5 +1,8 @@
+from cpython.exc cimport (
+    PyErr_Fetch,
+    PyErr_Occurred,
+)
 from cpython.object cimport PyObject
-from cpython.exc cimport PyErr_Occurred, PyErr_Fetch
 from cpython.ref cimport Py_XDECREF
 
 

From 0a4cba88755393afedefc20586651258056ec1bf Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Thu, 30 Oct 2025 22:22:22 -0400
Subject: [PATCH 10/16] cleanup

---
 pandas/_libs/hashtable_func_helper.pxi.in     | 46 +++++++++----------
 .../pandas/vendored/klib/khash_python.h       |  7 ---
 2 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 6ffeff9b5f3ab..0722fb65218b8 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -6,7 +6,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# name, dtype, ttype, tfunc_type, c_type, to_c_type
+# name, dtype, ttype, tfunc, c_type, to_c_type
 dtypes = [('Complex128', 'complex128', 'complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'),
           ('Complex64', 'complex64', 'complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'),
           ('Float64', 'float64', 'float64', 'float64', 'float64_t', ''),
@@ -23,7 +23,7 @@ dtypes = [('Complex128', 'complex128', 'complex128', 'complex128', 'khcomplex128
 
 }}
 
-{{for name, dtype, ttype, tfunc_type, c_type, to_c_type in dtypes}}
+{{for name, dtype, ttype, tfunc, c_type, to_c_type in dtypes}}
 
 
 @cython.wraparound(False)
@@ -53,26 +53,26 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
     #    result_keys remembers the original order of keys
 
     result_keys = {{name}}Vector()
-    table = kh_init_{{tfunc_type}}()
+    table = kh_init_{{tfunc}}()
 
     {{if dtype == 'object'}}
     if uses_mask:
         raise NotImplementedError("uses_mask not implemented with object dtype")
 
-    kh_resize_{{tfunc_type}}(table, n // 10)
+    kh_resize_{{tfunc}}(table, n // 10)
 
     for i in range(n):
         val = values[i]
         if not dropna or not checknull(val):
-            k = kh_get_{{tfunc_type}}(table, {{to_c_type}}val)
+            k = kh_get_{{tfunc}}(table, {{to_c_type}}val)
             if k != table.n_buckets:
                 table.vals[k] += 1
             else:
-                k = kh_put_{{tfunc_type}}(table, {{to_c_type}}val, &ret)
+                k = kh_put_{{tfunc}}(table, {{to_c_type}}val, &ret)
                 table.vals[k] = 1
                 result_keys.append(val)
     {{else}}
-    kh_resize_{{tfunc_type}}(table, n)
+    kh_resize_{{tfunc}}(table, n)
 
     for i in range(n):
         val = {{to_c_type}}(values[i])
@@ -88,11 +88,11 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
             if uses_mask and isna_entry:
                 na_counter += 1
             else:
-                k = kh_get_{{tfunc_type}}(table, val)
+                k = kh_get_{{tfunc}}(table, val)
                 if k != table.n_buckets:
                     table.vals[k] += 1
                 else:
-                    k = kh_put_{{tfunc_type}}(table, val, &ret)
+                    k = kh_put_{{tfunc}}(table, val, &ret)
                     table.vals[k] = 1
                     result_keys.append(val)
     {{endif}}
@@ -105,9 +105,9 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
 
     for i in range(table.size):
         {{if dtype == 'object'}}
-        k = kh_get_{{tfunc_type}}(table, result_keys.data[i])
+        k = kh_get_{{tfunc}}(table, result_keys.data[i])
         {{else}}
-        k = kh_get_{{tfunc_type}}(table, result_keys.data.data[i])
+        k = kh_get_{{tfunc}}(table, result_keys.data.data[i])
         {{endif}}
         result_counts[i] = table.vals[k]
 
@@ -115,7 +115,7 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
         result_counts[table.size] = na_counter
         result_keys.append(val)
 
-    kh_destroy_{{tfunc_type}}(table)
+    kh_destroy_{{tfunc}}(table)
 
     return result_keys.to_array(), result_counts.base, na_counter
 
@@ -136,12 +136,12 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
         {{endif}}
         Py_ssize_t i, n = len(values), first_na = -1
         khiter_t k
-        kh_{{ttype}}_t *table = kh_init_{{tfunc_type}}()
+        kh_{{ttype}}_t *table = kh_init_{{tfunc}}()
         ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
         bint seen_na = False, uses_mask = mask is not None
         bint seen_multiple_na = False
 
-    kh_resize_{{tfunc_type}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
+    kh_resize_{{tfunc}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
 
     if keep not in ('last', 'first', False):
         raise ValueError('keep must be either "first", "last" or False')
@@ -166,7 +166,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
                         seen_na = True
                 else:
                     value = {{to_c_type}}(values[i])
-                    kh_put_{{tfunc_type}}(table, value, &ret)
+                    kh_put_{{tfunc}}(table, value, &ret)
                     out[i] = ret == 0
     {{endfor}}
 
@@ -191,16 +191,16 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
 
                 else:
                     value = {{to_c_type}}(values[i])
-                    k = kh_get_{{tfunc_type}}(table, value)
+                    k = kh_get_{{tfunc}}(table, value)
                     if k != table.n_buckets:
                         out[table.vals[k]] = 1
                         out[i] = 1
                     else:
-                        k = kh_put_{{tfunc_type}}(table, value, &ret)
+                        k = kh_put_{{tfunc}}(table, value, &ret)
                         table.vals[k] = i
                         out[i] = 0
 
-    kh_destroy_{{tfunc_type}}(table)
+    kh_destroy_{{tfunc}}(table)
     return out
 
 
@@ -241,11 +241,11 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
         {{c_type}} val
         {{endif}}
 
-        kh_{{ttype}}_t *table = kh_init_{{tfunc_type}}()
+        kh_{{ttype}}_t *table = kh_init_{{tfunc}}()
 
     # construct the table
     n = len(values)
-    kh_resize_{{tfunc_type}}(table, n)
+    kh_resize_{{tfunc}}(table, n)
 
     {{if dtype == 'object'}}
     if True:
@@ -254,7 +254,7 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
     {{endif}}
         for i in range(n):
             val = {{to_c_type}}(values[i])
-            kh_put_{{tfunc_type}}(table, val, &ret)
+            kh_put_{{tfunc}}(table, val, &ret)
 
     # test membership
     n = len(arr)
@@ -267,10 +267,10 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
     {{endif}}
         for i in range(n):
             val = {{to_c_type}}(arr[i])
-            k = kh_get_{{tfunc_type}}(table, val)
+            k = kh_get_{{tfunc}}(table, val)
             result[i] = (k != table.n_buckets)
 
-    kh_destroy_{{tfunc_type}}(table)
+    kh_destroy_{{tfunc}}(table)
     return result.view(np.bool_)
 
 # ----------------------------------------------------------------------
diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index 45e44dc66d135..a18d41f6a0249 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -3,7 +3,6 @@
 #pragma once
 
 #include <Python.h>
-
 #include <pymem.h>
 #include <string.h>
 
@@ -218,9 +217,6 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
 
   int result = PyObject_RichCompareBool(a, b, Py_EQ);
   if (result < 0) {
-    if (PyErr_Occurred() != NULL) {
-      return 0;
-    }
     return 0;
   }
   return result;
@@ -323,9 +319,6 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
   }
 
   if (hash == -1) {
-    if (PyErr_Occurred() != NULL) {
-      return 0;
-    }
     return 0;
   }
 #if SIZEOF_PY_HASH_T == 4

From ac99ea6f6a16b30c0cd3340d022e8d5ac7463f1c Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Sat, 1 Nov 2025 18:35:05 -0400
Subject: [PATCH 11/16] hash dict/list as 0, do not compare NA

---
 .../include/pandas/vendored/klib/khash_python.h   | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index a18d41f6a0249..53a3a9da7ce08 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -191,6 +191,12 @@ static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) {
   return 1;
 }
 
+static inline int _is_pandas_NA_type(PyObject *o) {
+  // TODO compare PyTypeObject* C_NA, not strings!
+  PyObject* type_name = PyType_GetName(Py_TYPE(o));
+  return PyUnicode_CompareWithASCIIString(type_name, "NAType") == 0;
+}
+
 static inline int pyobject_cmp(PyObject *a, PyObject *b) {
   if (PyErr_Occurred() != NULL) {
     return 0;
@@ -213,6 +219,8 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
       return tupleobject_cmp((PyTupleObject *)a, (PyTupleObject *)b);
     }
     // frozenset isn't yet supported
+  } else if (_is_pandas_NA_type(a) || _is_pandas_NA_type(b)) {
+    return 0;
   }
 
   int result = PyObject_RichCompareBool(a, b, Py_EQ);
@@ -314,6 +322,13 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
   } else if (PyTuple_Check(key)) {
     // hash tuple subclasses as builtin tuples
     hash = tupleobject_hash((PyTupleObject *)key);
+  } else if (PyDict_Check(key) || PyList_Check(key)) {
+    // before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
+    // some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via DataFrame.describe,
+    // which counts generic objects using PyObjectHashTable.
+    // using hash = 0 for dict and list objects puts all of them in the same bucket,
+    // which is not optimal for performance but that is what the behaviour was before.
+    hash = 0;
   } else {
     hash = PyObject_Hash(key);
   }

From eb120e9063fbf77f6b31d32ba144628a659486ea Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Sat, 1 Nov 2025 18:54:30 -0400
Subject: [PATCH 12/16] fix precommit

---
 pandas/_libs/include/pandas/vendored/klib/khash_python.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index 53a3a9da7ce08..bc1cc30ed7905 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -191,7 +191,7 @@ static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) {
   return 1;
 }
 
-static inline int _is_pandas_NA_type(PyObject *o) {
+static inline int _is_pandas_NA_type(PyObject* o) {
   // TODO compare PyTypeObject* C_NA, not strings!
   PyObject* type_name = PyType_GetName(Py_TYPE(o));
   return PyUnicode_CompareWithASCIIString(type_name, "NAType") == 0;

From 2b0fa82703102dcb621696731997e750eb599888 Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Sat, 1 Nov 2025 21:07:51 -0400
Subject: [PATCH 13/16] stop using hash=0 for list, cleanup comment

---
 .../_libs/include/pandas/vendored/klib/khash_python.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index bc1cc30ed7905..8c46b2e5bafcf 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -322,12 +322,11 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
   } else if (PyTuple_Check(key)) {
     // hash tuple subclasses as builtin tuples
     hash = tupleobject_hash((PyTupleObject *)key);
-  } else if (PyDict_Check(key) || PyList_Check(key)) {
-    // before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
-    // some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via DataFrame.describe,
-    // which counts generic objects using PyObjectHashTable.
-    // using hash = 0 for dict and list objects puts all of them in the same bucket,
-    // which is not optimal for performance but that is what the behaviour was before.
+  } else if (PyDict_Check(key)) {
+    // Before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
+    // some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via
+    // DataFrame.describe, which counts generic objects using PyObjectHashTable.
+    // Using hash = 0 puts all objects in the same bucket, which is bad for performance but that is how it worked before.
     hash = 0;
   } else {
     hash = PyObject_Hash(key);

From c94b480e8754599865e7975e48ed523bd27a4296 Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Sat, 1 Nov 2025 21:08:43 -0400
Subject: [PATCH 14/16] fix unhashable UserDict in JSONArray.duplicated

---
 pandas/tests/extension/json/array.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 828e4415bd295..21736b24eb35e 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -25,6 +25,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Literal,
 )
 
 import numpy as np
@@ -41,12 +42,16 @@
     ExtensionArray,
     ExtensionDtype,
 )
+from pandas.core.algorithms import duplicated
 from pandas.core.indexers import unpack_tuple_and_ellipses
 
 if TYPE_CHECKING:
     from collections.abc import Mapping
 
-    from pandas._typing import type_t
+    from pandas._typing import (
+        npt,
+        type_t,
+    )
 
 
 class JSONDtype(ExtensionDtype):
@@ -254,6 +259,17 @@ def _pad_or_backfill(self, *, method, limit=None, copy=True):
         # GH#56616 - test EA method without limit_area argument
         return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
 
+    def duplicated(
+        self, keep: Literal["first", "last", False] = "first"
+    ) -> npt.NDArray[np.bool_]:
+        # pd.core.algorithms.duplicated is implemented with a hash table that
+        # does not support UserDict values.
+        # However, dict values are always hashed as 0 for backwards compatibility,
+        # see GH 57052
+        mask = self.isna().astype(np.bool_, copy=False)
+        values = np.array([dict(x) for x in self], dtype="object")
+        return duplicated(values=values, keep=keep, mask=mask)
+
 
 def make_data(n: int):
     # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer

From 2462e30f2715650d165d1de7f232894d4fef1417 Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Sat, 1 Nov 2025 21:28:50 -0400
Subject: [PATCH 15/16] hash list as 0 again, Series.isin hashes lists

---
 .../_libs/include/pandas/vendored/klib/khash_python.h  | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index 8c46b2e5bafcf..505126f61e421 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -322,11 +322,13 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
   } else if (PyTuple_Check(key)) {
     // hash tuple subclasses as builtin tuples
     hash = tupleobject_hash((PyTupleObject *)key);
-  } else if (PyDict_Check(key)) {
+  } else if (PyDict_Check(key) || PyList_Check(key)) {
     // Before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
-    // some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via
-    // DataFrame.describe, which counts generic objects using PyObjectHashTable.
-    // Using hash = 0 puts all objects in the same bucket, which is bad for performance but that is how it worked before.
+    // Existing code that relies on this behaviour is for example:
+    //   * _libs.hashtable.value_count_object via DataFrame.describe
+    //   * _libs.hashtable.ismember_object via Series.isin
+    // Using hash = 0 puts all dict and list objects in the same bucket,
+    // which is bad for performance but that is how it worked before.
     hash = 0;
   } else {
     hash = PyObject_Hash(key);

From 0fc752e86946e96287a0d02ab382d45ec1d03d8d Mon Sep 17 00:00:00 2001
From: Matias Lindgren <matias.lindgren@iki.fi>
Date: Mon, 3 Nov 2025 22:32:00 -0500
Subject: [PATCH 16/16] fix precommit

---
 pandas/_libs/include/pandas/vendored/klib/khash_python.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
index 505126f61e421..e8abc4415dbe6 100644
--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h
+++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h
@@ -191,9 +191,9 @@ static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) {
   return 1;
 }
 
-static inline int _is_pandas_NA_type(PyObject* o) {
+static inline int _is_pandas_NA_type(PyObject *o) {
   // TODO compare PyTypeObject* C_NA, not strings!
-  PyObject* type_name = PyType_GetName(Py_TYPE(o));
+  PyObject *type_name = PyType_GetName(Py_TYPE(o));
   return PyUnicode_CompareWithASCIIString(type_name, "NAType") == 0;
 }
 
@@ -323,8 +323,8 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
     // hash tuple subclasses as builtin tuples
     hash = tupleobject_hash((PyTupleObject *)key);
   } else if (PyDict_Check(key) || PyList_Check(key)) {
-    // Before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
-    // Existing code that relies on this behaviour is for example:
+    // Before GH 57052 was fixed, all exceptions raised from PyObject_Hash were
+    // suppressed. Existing code that relies on this behaviour is for example:
     //   * _libs.hashtable.value_count_object via DataFrame.describe
     //   * _libs.hashtable.ismember_object via Series.isin
     // Using hash = 0 puts all dict and list objects in the same bucket,