Skip to content

Commit 27bfa14

Browse files
committed
BUG make hashtable.unique support readonly arrays
This problem was brought up in pandas-dev#18773 and effectively comes down to how Cython deals with readonly arrays. While it would be ideal for Cython to fix the underlying problem in the meantime we can rely on this.
1 parent 6f886d7 commit 27bfa14

File tree

1 file changed

+54
-41
lines changed

1 file changed

+54
-41
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 54 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -255,10 +255,56 @@ dtypes = [('Float64', 'float64', 'val != val', True),
255255
('UInt64', 'uint64', 'False', False),
256256
('Int64', 'int64', 'val == iNaT', False)]
257257

258+
def get_dispatch(dtypes):
259+
for (name, dtype, null_condition, float_group) in dtypes:
260+
unique_template = """\
261+
cdef:
262+
Py_ssize_t i, n = len(values)
263+
int ret = 0
264+
{dtype}_t val
265+
khiter_t k
266+
bint seen_na = 0
267+
{name}Vector uniques = {name}Vector()
268+
{name}VectorData *ud
269+
270+
ud = uniques.data
271+
272+
with nogil:
273+
for i in range(n):
274+
val = values[i]
275+
IF {float_group}:
276+
if val == val:
277+
k = kh_get_{dtype}(self.table, val)
278+
if k == self.table.n_buckets:
279+
kh_put_{dtype}(self.table, val, &ret)
280+
if needs_resize(ud):
281+
with gil:
282+
uniques.resize()
283+
append_data_{dtype}(ud, val)
284+
elif not seen_na:
285+
seen_na = 1
286+
if needs_resize(ud):
287+
with gil:
288+
uniques.resize()
289+
append_data_{dtype}(ud, NAN)
290+
ELSE:
291+
k = kh_get_{dtype}(self.table, val)
292+
if k == self.table.n_buckets:
293+
kh_put_{dtype}(self.table, val, &ret)
294+
if needs_resize(ud):
295+
with gil:
296+
uniques.resize()
297+
append_data_{dtype}(ud, val)
298+
return uniques.to_array()
299+
"""
300+
301+
unique_template = unique_template.format(name=name, dtype=dtype, null_condition=null_condition, float_group=float_group)
302+
303+
yield (name, dtype, null_condition, float_group, unique_template)
258304
}}
259305

260306

261-
{{for name, dtype, null_condition, float_group in dtypes}}
307+
{{for name, dtype, null_condition, float_group, unique_template in get_dispatch(dtypes)}}
262308

263309
cdef class {{name}}HashTable(HashTable):
264310

@@ -450,48 +496,15 @@ cdef class {{name}}HashTable(HashTable):
450496
return np.asarray(labels), arr_uniques
451497

452498
@cython.boundscheck(False)
453-
def unique(self, {{dtype}}_t[:] values):
454-
cdef:
455-
Py_ssize_t i, n = len(values)
456-
int ret = 0
457-
{{dtype}}_t val
458-
khiter_t k
459-
bint seen_na = 0
460-
{{name}}Vector uniques = {{name}}Vector()
461-
{{name}}VectorData *ud
499+
def unique(self, ndarray[{{dtype}}_t, ndim=1] values):
500+
if values.flags.writeable:
501+
return self.unique_memview(values)
462502

463-
ud = uniques.data
503+
{{unique_template}}
464504

465-
with nogil:
466-
for i in range(n):
467-
val = values[i]
468-
469-
{{if float_group}}
470-
if val == val:
471-
k = kh_get_{{dtype}}(self.table, val)
472-
if k == self.table.n_buckets:
473-
kh_put_{{dtype}}(self.table, val, &ret)
474-
if needs_resize(ud):
475-
with gil:
476-
uniques.resize()
477-
append_data_{{dtype}}(ud, val)
478-
elif not seen_na:
479-
seen_na = 1
480-
if needs_resize(ud):
481-
with gil:
482-
uniques.resize()
483-
append_data_{{dtype}}(ud, NAN)
484-
{{else}}
485-
k = kh_get_{{dtype}}(self.table, val)
486-
if k == self.table.n_buckets:
487-
kh_put_{{dtype}}(self.table, val, &ret)
488-
if needs_resize(ud):
489-
with gil:
490-
uniques.resize()
491-
append_data_{{dtype}}(ud, val)
492-
{{endif}}
493-
494-
return uniques.to_array()
505+
@cython.boundscheck(False)
506+
def unique_memview(self, {{dtype}}_t[:] values):
507+
{{unique_template}}
495508

496509
{{endfor}}
497510

0 commit comments

Comments
 (0)