From 61461ddcce754ec41830ce1269fb1b8fc62dd502 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 22 Dec 2019 09:04:58 -0800 Subject: [PATCH 1/4] move away frrom template --- pandas/_libs/algos_take_helper.pxi.in | 82 ++++++++++++++++++--------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 9dbae8170cbd0..0ba10d27be074 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -95,28 +95,6 @@ def get_dispatch(dtypes): out[i, j] = %(preval)svalues[idx, j]%(postval)s """ - inner_take_2d_axis1_template = """\ - cdef: - Py_ssize_t i, j, k, n, idx - %(c_type_out)s fv - - n = len(values) - k = len(indexer) - - if n == 0 or k == 0: - return - - fv = fill_value - - for i in range(n): - for j in range(k): - idx = indexer[j] - if idx == -1: - out[i, j] = fv - else: - out[i, j] = %(preval)svalues[i, idx]%(postval)s -""" - for (c_type_in, c_type_out, preval, postval) in dtypes: can_copy = c_type_in == c_type_out != "object" @@ -128,6 +106,13 @@ def get_dispatch(dtypes): nogil_str = '' tab = '' + if c_type_in == "uint8_t" and c_type_out == "object": + assert preval == 'True if ' + assert postval == ' > 0 else False' + else: + assert preval == "" + assert postval == "" + def get_name(dtype_name): if dtype_name == "object": return "object" @@ -144,16 +129,15 @@ def get_dispatch(dtypes): inner_take_1d = inner_take_1d_template % args inner_take_2d_axis0 = inner_take_2d_axis0_template % args - inner_take_2d_axis1 = inner_take_2d_axis1_template % args yield (name, dest, c_type_in, c_type_out, preval, postval, - inner_take_1d, inner_take_2d_axis0, inner_take_2d_axis1) + inner_take_1d, inner_take_2d_axis0) }} {{for name, dest, c_type_in, c_type_out, preval, postval, - inner_take_1d, inner_take_2d_axis0, inner_take_2d_axis1 + inner_take_1d, inner_take_2d_axis0 in get_dispatch(dtypes)}} @@ -220,7 +204,29 @@ cdef inline take_2d_axis1_{{name}}_{{dest}}_memview({{c_type_in}}[:, :] values, const int64_t[:] indexer, {{c_type_out}}[:, :] out, fill_value=np.nan): -{{inner_take_2d_axis1}} + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + + n = len(values) + k = len(indexer) + + if n == 0 or k == 0: + return + + fv = fill_value + + for i in range(n): + for j in range(k): + idx = indexer[j] + if idx == -1: + out[i, j] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[i, idx] > 0 else False + {{else}} + out[i, j] = values[i, idx] + {{endif}} @cython.wraparound(False) @@ -239,7 +245,29 @@ def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, # We cannot use the memoryview version on readonly-buffers due to # a limitation of Cython's typed memoryviews. Instead we can use # the slightly slower Cython ndarray type directly. -{{inner_take_2d_axis1}} + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + + n = len(values) + k = len(indexer) + + if n == 0 or k == 0: + return + + fv = fill_value + + for i in range(n): + for j in range(k): + idx = indexer[j] + if idx == -1: + out[i, j] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[i, idx] > 0 else False + {{else}} + out[i, j] = values[i, idx] + {{endif}} @cython.wraparound(False) From 5ddab4f8305657aad5a9935a55f14e866e8e9c79 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 22 Dec 2019 13:02:36 -0800 Subject: [PATCH 2/4] Move away from double-templating --- pandas/_libs/algos_take_helper.pxi.in | 270 +++++++++++++++----------- 1 file changed, 159 insertions(+), 111 deletions(-) diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 0ba10d27be074..82f012e19b615 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -10,108 +10,33 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in {{py: -# c_type_in, c_type_out, preval, postval +# c_type_in, c_type_out dtypes = [ - ('uint8_t', 'uint8_t', '', ''), - ('uint8_t', 'object', 'True if ', ' > 0 else False'), - ('int8_t', 'int8_t', '', ''), - ('int8_t', 'int32_t', '', ''), - ('int8_t', 'int64_t', '', ''), - ('int8_t', 'float64_t', '', ''), - ('int16_t', 'int16_t', '', ''), - ('int16_t', 'int32_t', '', ''), - ('int16_t', 'int64_t', '', ''), - ('int16_t', 'float64_t', '', ''), - ('int32_t', 'int32_t', '', ''), - ('int32_t', 'int64_t', '', ''), - ('int32_t', 'float64_t', '', ''), - ('int64_t', 'int64_t', '', ''), - ('int64_t', 'float64_t', '', ''), - ('float32_t', 'float32_t', '', ''), - ('float32_t', 'float64_t', '', ''), - ('float64_t', 'float64_t', '', ''), - ('object', 'object', '', ''), + ('uint8_t', 'uint8_t'), + ('uint8_t', 'object'), + ('int8_t', 'int8_t'), + ('int8_t', 'int32_t'), + ('int8_t', 'int64_t'), + ('int8_t', 'float64_t'), + ('int16_t', 'int16_t'), + ('int16_t', 'int32_t'), + ('int16_t', 'int64_t'), + ('int16_t', 'float64_t'), + ('int32_t', 'int32_t'), + ('int32_t', 'int64_t'), + ('int32_t', 'float64_t'), + ('int64_t', 'int64_t'), + ('int64_t', 'float64_t'), + ('float32_t', 'float32_t'), + ('float32_t', 'float64_t'), + ('float64_t', 'float64_t'), + ('object', 'object'), ] def get_dispatch(dtypes): - inner_take_1d_template = """ - cdef: - Py_ssize_t i, n, idx - %(c_type_out)s fv - - n = indexer.shape[0] - - fv = fill_value - - %(nogil_str)s - %(tab)sfor i in range(n): - %(tab)s idx = indexer[i] - %(tab)s if idx == -1: - %(tab)s out[i] = fv - %(tab)s else: - %(tab)s out[i] = %(preval)svalues[idx]%(postval)s -""" - - inner_take_2d_axis0_template = """\ - cdef: - Py_ssize_t i, j, k, n, idx - %(c_type_out)s fv - - n = len(indexer) - k = values.shape[1] - - fv = fill_value - - IF %(can_copy)s: - cdef: - %(c_type_out)s *v - %(c_type_out)s *o - - #GH3130 - if (values.strides[1] == out.strides[1] and - values.strides[1] == sizeof(%(c_type_out)s) and - sizeof(%(c_type_out)s) * n >= 256): - - for i in range(n): - idx = indexer[i] - if idx == -1: - for j in range(k): - out[i, j] = fv - else: - v = &values[idx, 0] - o = &out[i, 0] - memmove(o, v, (sizeof(%(c_type_out)s) * k)) - return - - for i in range(n): - idx = indexer[i] - if idx == -1: - for j in range(k): - out[i, j] = fv - else: - for j in range(k): - out[i, j] = %(preval)svalues[idx, j]%(postval)s -""" - - for (c_type_in, c_type_out, preval, postval) in dtypes: - - can_copy = c_type_in == c_type_out != "object" - nogil = c_type_out != "object" - if nogil: - nogil_str = "with nogil:" - tab = ' ' - else: - nogil_str = '' - tab = '' - - if c_type_in == "uint8_t" and c_type_out == "object": - assert preval == 'True if ' - assert postval == ' > 0 else False' - else: - assert preval == "" - assert postval == "" + for (c_type_in, c_type_out) in dtypes: def get_name(dtype_name): if dtype_name == "object": @@ -124,21 +49,14 @@ def get_dispatch(dtypes): dest = get_name(c_type_out) args = dict(name=name, dest=dest, c_type_in=c_type_in, - c_type_out=c_type_out, preval=preval, postval=postval, - can_copy=can_copy, nogil_str=nogil_str, tab=tab) + c_type_out=c_type_out) - inner_take_1d = inner_take_1d_template % args - inner_take_2d_axis0 = inner_take_2d_axis0_template % args - - yield (name, dest, c_type_in, c_type_out, preval, postval, - inner_take_1d, inner_take_2d_axis0) + yield (name, dest, c_type_in, c_type_out) }} -{{for name, dest, c_type_in, c_type_out, preval, postval, - inner_take_1d, inner_take_2d_axis0 - in get_dispatch(dtypes)}} +{{for name, dest, c_type_in, c_type_out in get_dispatch(dtypes)}} @cython.wraparound(False) @@ -148,8 +66,29 @@ cdef inline take_1d_{{name}}_{{dest}}_memview({{c_type_in}}[:] values, {{c_type_out}}[:] out, fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + {{c_type_out}} fv + + n = indexer.shape[0] + + fv = fill_value -{{inner_take_1d}} + {{if c_type_out != "object"}} + with nogil: + {{else}} + if True: + {{endif}} + for i in range(n): + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i] = True if values[idx] > 0 else False + {{else}} + out[i] = values[idx] + {{endif}} @cython.wraparound(False) @@ -168,7 +107,30 @@ def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values, # We cannot use the memoryview version on readonly-buffers due to # a limitation of Cython's typed memoryviews. Instead we can use # the slightly slower Cython ndarray type directly. -{{inner_take_1d}} + cdef: + Py_ssize_t i, n, idx + {{c_type_out}} fv + + n = indexer.shape[0] + + fv = fill_value + + {{if c_type_out != "object"}} + with nogil: + {{else}} + if True: + {{endif}} + for i in range(n): + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i] = True if values[idx] > 0 else False + {{else}} + out[i] = values[idx] + {{endif}} + @cython.wraparound(False) @@ -177,7 +139,48 @@ cdef inline take_2d_axis0_{{name}}_{{dest}}_memview({{c_type_in}}[:, :] values, const int64_t[:] indexer, {{c_type_out}}[:, :] out, fill_value=np.nan): -{{inner_take_2d_axis0}} + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + + n = len(indexer) + k = values.shape[1] + + fv = fill_value + + IF {{True if c_type_in == c_type_out != "object" else False}}: + cdef: + {{c_type_out}} *v + {{c_type_out}} *o + + # GH#3130 + if (values.strides[1] == out.strides[1] and + values.strides[1] == sizeof({{c_type_out}}) and + sizeof({{c_type_out}}) * n >= 256): + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + v = &values[idx, 0] + o = &out[i, 0] + memmove(o, v, (sizeof({{c_type_out}}) * k)) + return + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + for j in range(k): + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, j] > 0 else False + {{else}} + out[i, j] = values[idx, j] + {{endif}} @cython.wraparound(False) @@ -195,7 +198,48 @@ def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, # We cannot use the memoryview version on readonly-buffers due to # a limitation of Cython's typed memoryviews. Instead we can use # the slightly slower Cython ndarray type directly. -{{inner_take_2d_axis0}} + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + + n = len(indexer) + k = values.shape[1] + + fv = fill_value + + IF {{True if c_type_in == c_type_out != "object" else False}}: + cdef: + {{c_type_out}} *v + {{c_type_out}} *o + + # GH#3130 + if (values.strides[1] == out.strides[1] and + values.strides[1] == sizeof({{c_type_out}}) and + sizeof({{c_type_out}}) * n >= 256): + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + v = &values[idx, 0] + o = &out[i, 0] + memmove(o, v, (sizeof({{c_type_out}}) * k)) + return + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + for j in range(k): + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, j] > 0 else False + {{else}} + out[i, j] = values[idx, j] + {{endif}} @cython.wraparound(False) @@ -296,7 +340,11 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, if idx1[j] == -1: out[i, j] = fv else: - out[i, j] = {{preval}}values[idx, idx1[j]]{{postval}} + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, idx1[j]] > 0 else False + {{else}} + out[i, j] = values[idx, idx1[j]] + {{endif}} {{endfor}} From 5c35c7640db4a267295f12193e2176444a0e592a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 22 Dec 2019 14:57:01 -0800 Subject: [PATCH 3/4] typo fixup --- pandas/_libs/algos_take_helper.pxi.in | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 82f012e19b615..a5232e9b04f53 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -82,7 +82,7 @@ cdef inline take_1d_{{name}}_{{dest}}_memview({{c_type_in}}[:] values, for i in range(n): idx = indexer[i] if idx == -1: - out[i] = fv + out[i] = fv else: {{if c_type_in == "uint8_t" and c_type_out == "object"}} out[i] = True if values[idx] > 0 else False @@ -123,7 +123,7 @@ def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values, for i in range(n): idx = indexer[i] if idx == -1: - out[i] = fv + out[i] = fv else: {{if c_type_in == "uint8_t" and c_type_out == "object"}} out[i] = True if values[idx] > 0 else False @@ -132,7 +132,6 @@ def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values, {{endif}} - @cython.wraparound(False) @cython.boundscheck(False) cdef inline take_2d_axis0_{{name}}_{{dest}}_memview({{c_type_in}}[:, :] values, From eb3b7939a38142ffbbc3b3e3a4545868832979e0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Dec 2019 10:43:19 -0800 Subject: [PATCH 4/4] modernize usage in take_helper --- pandas/_libs/algos_take_helper.pxi.in | 152 ++------------------------ 1 file changed, 12 insertions(+), 140 deletions(-) diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index a5232e9b04f53..420e08a3d68d4 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -61,52 +61,15 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -cdef inline take_1d_{{name}}_{{dest}}_memview({{c_type_in}}[:] values, - const int64_t[:] indexer, - {{c_type_out}}[:] out, - fill_value=np.nan): - - cdef: - Py_ssize_t i, n, idx - {{c_type_out}} fv - - n = indexer.shape[0] - - fv = fill_value - - {{if c_type_out != "object"}} - with nogil: - {{else}} - if True: - {{endif}} - for i in range(n): - idx = indexer[i] - if idx == -1: - out[i] = fv - else: - {{if c_type_in == "uint8_t" and c_type_out == "object"}} - out[i] = True if values[idx] > 0 else False - {{else}} - out[i] = values[idx] - {{endif}} - - -@cython.wraparound(False) -@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values, +{{else}} def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values, +{{endif}} const int64_t[:] indexer, {{c_type_out}}[:] out, fill_value=np.nan): - if values.flags.writeable: - # We can call the memoryview version of the code - take_1d_{{name}}_{{dest}}_memview(values, indexer, out, - fill_value=fill_value) - return - - # We cannot use the memoryview version on readonly-buffers due to - # a limitation of Cython's typed memoryviews. Instead we can use - # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx {{c_type_out}} fv @@ -134,69 +97,14 @@ def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values, @cython.wraparound(False) @cython.boundscheck(False) -cdef inline take_2d_axis0_{{name}}_{{dest}}_memview({{c_type_in}}[:, :] values, - const int64_t[:] indexer, - {{c_type_out}}[:, :] out, - fill_value=np.nan): - cdef: - Py_ssize_t i, j, k, n, idx - {{c_type_out}} fv - - n = len(indexer) - k = values.shape[1] - - fv = fill_value - - IF {{True if c_type_in == c_type_out != "object" else False}}: - cdef: - {{c_type_out}} *v - {{c_type_out}} *o - - # GH#3130 - if (values.strides[1] == out.strides[1] and - values.strides[1] == sizeof({{c_type_out}}) and - sizeof({{c_type_out}}) * n >= 256): - - for i in range(n): - idx = indexer[i] - if idx == -1: - for j in range(k): - out[i, j] = fv - else: - v = &values[idx, 0] - o = &out[i, 0] - memmove(o, v, (sizeof({{c_type_out}}) * k)) - return - - for i in range(n): - idx = indexer[i] - if idx == -1: - for j in range(k): - out[i, j] = fv - else: - for j in range(k): - {{if c_type_in == "uint8_t" and c_type_out == "object"}} - out[i, j] = True if values[idx, j] > 0 else False - {{else}} - out[i, j] = values[idx, j] - {{endif}} - - -@cython.wraparound(False) -@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, +{{else}} def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, +{{endif}} ndarray[int64_t] indexer, {{c_type_out}}[:, :] out, fill_value=np.nan): - if values.flags.writeable: - # We can call the memoryview version of the code - take_2d_axis0_{{name}}_{{dest}}_memview(values, indexer, out, - fill_value=fill_value) - return - - # We cannot use the memoryview version on readonly-buffers due to - # a limitation of Cython's typed memoryviews. Instead we can use - # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, j, k, n, idx {{c_type_out}} fv @@ -243,51 +151,15 @@ def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, @cython.wraparound(False) @cython.boundscheck(False) -cdef inline take_2d_axis1_{{name}}_{{dest}}_memview({{c_type_in}}[:, :] values, - const int64_t[:] indexer, - {{c_type_out}}[:, :] out, - fill_value=np.nan): - cdef: - Py_ssize_t i, j, k, n, idx - {{c_type_out}} fv - - n = len(values) - k = len(indexer) - - if n == 0 or k == 0: - return - - fv = fill_value - - for i in range(n): - for j in range(k): - idx = indexer[j] - if idx == -1: - out[i, j] = fv - else: - {{if c_type_in == "uint8_t" and c_type_out == "object"}} - out[i, j] = True if values[i, idx] > 0 else False - {{else}} - out[i, j] = values[i, idx] - {{endif}} - - -@cython.wraparound(False) -@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, +{{else}} def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, +{{endif}} ndarray[int64_t] indexer, {{c_type_out}}[:, :] out, fill_value=np.nan): - if values.flags.writeable: - # We can call the memoryview version of the code - take_2d_axis1_{{name}}_{{dest}}_memview(values, indexer, out, - fill_value=fill_value) - return - - # We cannot use the memoryview version on readonly-buffers due to - # a limitation of Cython's typed memoryviews. Instead we can use - # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, j, k, n, idx {{c_type_out}} fv