Skip to content

Commit d2731d2

Browse files
committed
get_labels_groupby is only used with Int64HashTable, thus define it only for int64. What should happen for other types (encoding nans as negative values is not possible with all types) depends on how it is used (but it is not used right now)
1 parent fda1fca commit d2731d2

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,7 @@ cdef class {{name}}HashTable(HashTable):
681681
ignore_na=True, return_inverse=True)
682682
return labels
683683

684+
{{if dtype == 'int64'}}
684685
@cython.boundscheck(False)
685686
def get_labels_groupby(self, const {{dtype}}_t[:] values):
686687
cdef:
@@ -701,16 +702,9 @@ cdef class {{name}}HashTable(HashTable):
701702
val = {{to_c_type}}(values[i])
702703

703704
# specific for groupby
704-
{{if dtype == 'complex64' or dtype== 'complex128'}}
705-
# TODO: what should be done here?
706-
if val.real < 0:
707-
labels[i] = -1
708-
continue
709-
{{elif dtype != 'uint64'}}
710705
if val < 0:
711706
labels[i] = -1
712707
continue
713-
{{endif}}
714708

715709
k = kh_get_{{dtype}}(self.table, val)
716710
if k != self.table.n_buckets:
@@ -730,6 +724,7 @@ cdef class {{name}}HashTable(HashTable):
730724
arr_uniques = uniques.to_array()
731725

732726
return np.asarray(labels), arr_uniques
727+
{{endif}}
733728

734729
{{endfor}}
735730

pandas/tests/libs/test_hashtable.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,16 @@ def test_tracemalloc_for_empty(self, table_type, dtype):
156156
assert get_allocated_khash_memory() == 0
157157

158158

159+
def test_get_labels_groupby_for_Int64():
160+
table = ht.Int64HashTable()
161+
vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)
162+
arr, unique = table.get_labels_groupby(vals)
163+
expected_arr = np.array([0, 1, -1, 1, 0, -1], dtype=np.int64)
164+
expected_unique = np.array([1, 2], dtype=np.int64)
165+
tm.assert_numpy_array_equal(arr, expected_arr)
166+
tm.assert_numpy_array_equal(unique, expected_unique)
167+
168+
159169
def test_tracemalloc_works_for_StringHashTable():
160170
N = 1000
161171
keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)

0 commit comments

Comments
 (0)