Skip to content

Commit 861c211

Browse files
committed
make hash and equal functions accessible from pure python
1 parent b2ecad5 commit 861c211

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

pandas/_libs/hashtable.pyx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ from pandas._libs.khash cimport (
3434
are_equivalent_khcomplex64_t,
3535
are_equivalent_khcomplex128_t,
3636
kh_needed_n_buckets,
37+
kh_python_hash_equal,
38+
kh_python_hash_func,
3739
kh_str_t,
3840
khcomplex64_t,
3941
khcomplex128_t,
@@ -46,6 +48,14 @@ def get_hashtable_trace_domain():
4648
return KHASH_TRACE_DOMAIN
4749

4850

51+
def object_hash(obj):
52+
return kh_python_hash_func(obj)
53+
54+
55+
def objects_are_equal(a, b):
56+
return kh_python_hash_equal(a, b)
57+
58+
4959
cdef int64_t NPY_NAT = util.get_nat()
5060
SIZE_HINT_LIMIT = (1 << 20) + 7
5161

pandas/_libs/khash.pxd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ cdef extern from "khash_python.h":
4141
bint are_equivalent_float32_t \
4242
"kh_floats_hash_equal" (float32_t a, float32_t b) nogil
4343

44+
uint32_t kh_python_hash_func(object key)
45+
bint kh_python_hash_equal(object a, object b)
46+
4447
ctypedef struct kh_pymap_t:
4548
khuint_t n_buckets, size, n_occupied, upper_bound
4649
uint32_t *flags

pandas/tests/libs/test_hashtable.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ def test_nan_in_nested_tuple(self):
240240
assert str(error.value) == str(other)
241241

242242

243+
def test_hash_equal_tuple_with_nans():
244+
a = (float("nan"), (float("nan"), float("nan")))
245+
b = (float("nan"), (float("nan"), float("nan")))
246+
assert ht.object_hash(a) == ht.object_hash(b)
247+
assert ht.objects_are_equal(a, b)
248+
249+
243250
def test_get_labels_groupby_for_Int64(writable):
244251
table = ht.Int64HashTable()
245252
vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)

0 commit comments

Comments
 (0)