Skip to content

Assorted _libs cleanups #22235

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Aug 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
Py_ssize_t i, j, k

k = len(values)
for j from 0 <= j < k:
for j in range(k):
i = indexer[j]
out[i] = values[j, loc]

Expand Down
22 changes: 11 additions & 11 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_dispatch(dtypes):
fv = fill_value

%(nogil_str)s
%(tab)sfor i from 0 <= i < n:
%(tab)sfor i in range(n):
%(tab)s idx = indexer[i]
%(tab)s if idx == -1:
%(tab)s out[i] = fv
Expand Down Expand Up @@ -74,24 +74,24 @@ def get_dispatch(dtypes):
values.strides[1] == sizeof(%(c_type_out)s) and
sizeof(%(c_type_out)s) * n >= 256):

for i from 0 <= i < n:
for i in range(n):
idx = indexer[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
v = &values[idx, 0]
o = &out[i, 0]
memmove(o, v, <size_t>(sizeof(%(c_type_out)s) * k))
return

for i from 0 <= i < n:
for i in range(n):
idx = indexer[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = %(preval)svalues[idx, j]%(postval)s
"""

Expand All @@ -108,8 +108,8 @@ def get_dispatch(dtypes):

fv = fill_value

for i from 0 <= i < n:
for j from 0 <= j < k:
for i in range(n):
for j in range(k):
idx = indexer[j]
if idx == -1:
out[i, j] = fv
Expand Down Expand Up @@ -246,13 +246,13 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
k = len(idx1)

fv = fill_value
for i from 0 <= i < n:
for i in range(n):
idx = idx0[i]
if idx == -1:
for j from 0 <= j < k:
for j in range(k):
out[i, j] = fv
else:
for j from 0 <= j < k:
for j in range(k):
if idx1[j] == -1:
out[i, j] = fv
else:
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -161,18 +161,18 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
{{endif}}
elif keep == 'first':
{{if dtype == 'object'}}
for i from 0 <= i < n:
for i in range(n):
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
out[i] = ret == 0
{{else}}
with nogil:
for i from 0 <= i < n:
for i in range(n):
kh_put_{{ttype}}(table, values[i], &ret)
out[i] = ret == 0
{{endif}}
else:
{{if dtype == 'object'}}
for i from 0 <= i < n:
for i in range(n):
value = values[i]
k = kh_get_{{ttype}}(table, <PyObject*> value)
if k != table.n_buckets:
Expand All @@ -185,7 +185,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
out[i] = 0
{{else}}
with nogil:
for i from 0 <= i < n:
for i in range(n):
value = values[i]
k = kh_get_{{ttype}}(table, value)
if k != table.n_buckets:
Expand Down
23 changes: 12 additions & 11 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -78,29 +78,30 @@ cdef bint PY2 = sys.version_info[0] == 2
cdef double nan = <double>np.NaN


def values_from_object(object o):
def values_from_object(object obj):
""" return my values or the object if we are say an ndarray """
cdef f
cdef func # TODO: Does declaring this without a type accomplish anything?

f = getattr(o, 'get_values', None)
if f is not None:
o = f()
func = getattr(obj, 'get_values', None)
if func is not None:
obj = func()

return o
return obj


@cython.wraparound(False)
@cython.boundscheck(False)
def memory_usage_of_objects(ndarray[object, ndim=1] arr):
def memory_usage_of_objects(object[:] arr):
""" return the memory usage of an object array in bytes,
does not include the actual bytes of the pointers """
cdef Py_ssize_t i, n
cdef int64_t s = 0
cdef:
Py_ssize_t i, n
int64_t size = 0

n = len(arr)
for i in range(n):
s += arr[i].__sizeof__()
return s
size += arr[i].__sizeof__()
return size


# ----------------------------------------------------------------------
Expand Down
35 changes: 17 additions & 18 deletions pandas/_libs/ops.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
# cython: profile=False
import operator

from cpython cimport (PyFloat_Check, PyBool_Check,
Expand All @@ -21,7 +20,7 @@ from missing cimport checknull

@cython.wraparound(False)
@cython.boundscheck(False)
def scalar_compare(ndarray[object] values, object val, object op):
def scalar_compare(object[:] values, object val, object op):
"""
Compare each element of `values` array with the scalar `val`, with
the comparison operation described by `op`.
Expand Down Expand Up @@ -73,7 +72,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
else:
try:
result[i] = PyObject_RichCompareBool(x, val, flag)
except (TypeError):
except TypeError:
result[i] = True
elif flag == Py_EQ:
for i in range(n):
Expand All @@ -85,7 +84,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
else:
try:
result[i] = PyObject_RichCompareBool(x, val, flag)
except (TypeError):
except TypeError:
result[i] = False

else:
Expand All @@ -103,7 +102,7 @@ def scalar_compare(ndarray[object] values, object val, object op):

@cython.wraparound(False)
@cython.boundscheck(False)
def vec_compare(ndarray[object] left, ndarray[object] right, object op):
def vec_compare(object[:] left, object[:] right, object op):
"""
Compare the elements of `left` with the elements of `right` pointwise,
with the comparison operation described by `op`.
Expand All @@ -126,8 +125,8 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):
int flag

if n != len(right):
raise ValueError('Arrays were different lengths: %d vs %d'
% (n, len(right)))
raise ValueError('Arrays were different lengths: {n} vs {nright}'
.format(n=n, nright=len(right)))

if op is operator.lt:
flag = Py_LT
Expand Down Expand Up @@ -170,7 +169,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):

@cython.wraparound(False)
@cython.boundscheck(False)
def scalar_binop(ndarray[object] values, object val, object op):
def scalar_binop(object[:] values, object val, object op):
"""
Apply the given binary operator `op` between each element of the array
`values` and the scalar `val`.
Expand All @@ -187,13 +186,13 @@ def scalar_binop(ndarray[object] values, object val, object op):
"""
cdef:
Py_ssize_t i, n = len(values)
ndarray[object] result
object[:] result
object x

result = np.empty(n, dtype=object)
if val is None or is_nan(val):
result.fill(val)
return result
result[:] = val
return result.base # `.base` to access underlying np.ndarray

for i in range(n):
x = values[i]
Expand All @@ -202,12 +201,12 @@ def scalar_binop(ndarray[object] values, object val, object op):
else:
result[i] = op(x, val)

return maybe_convert_bool(result)
return maybe_convert_bool(result.base)


@cython.wraparound(False)
@cython.boundscheck(False)
def vec_binop(ndarray[object] left, ndarray[object] right, object op):
def vec_binop(object[:] left, object[:] right, object op):
"""
Apply the given binary operator `op` pointwise to the elements of
arrays `left` and `right`.
Expand All @@ -224,11 +223,11 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
"""
cdef:
Py_ssize_t i, n = len(left)
ndarray[object] result
object[:] result

if n != len(right):
raise ValueError('Arrays were different lengths: %d vs %d'
% (n, len(right)))
raise ValueError('Arrays were different lengths: {n} vs {nright}'
.format(n=n, nright=len(right)))

result = np.empty(n, dtype=object)

Expand All @@ -245,7 +244,7 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
else:
raise

return maybe_convert_bool(result)
return maybe_convert_bool(result.base) # `.base` to access np.ndarray


def maybe_convert_bool(ndarray[object] arr,
Expand All @@ -270,7 +269,7 @@ def maybe_convert_bool(ndarray[object] arr,
if false_values is not None:
false_vals = false_vals | set(false_values)

for i from 0 <= i < n:
for i in range(n):
val = arr[i]

if PyBool_Check(val):
Expand Down
25 changes: 13 additions & 12 deletions pandas/_libs/sparse.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ cdef class IntIndex(SparseIndex):
new_indices = np.empty(min(
len(xindices), len(yindices)), dtype=np.int32)

for xi from 0 <= xi < self.npoints:
for xi in range(self.npoints):
xind = xindices[xi]

while yi < y.npoints and yindices[yi] < xind:
Expand Down Expand Up @@ -292,7 +292,7 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices):

# TODO: two-pass algorithm faster?
prev = block = indices[0]
for i from 1 <= i < npoints:
for i in range(1, npoints):
cur = indices[i]
if cur - prev > 1:
# new block
Expand Down Expand Up @@ -383,21 +383,22 @@ cdef class BlockIndex(SparseIndex):
if len(blocs) != len(blengths):
raise ValueError('block bound arrays must be same length')

for i from 0 <= i < self.nblocks:
for i in range(self.nblocks):
if i > 0:
if blocs[i] <= blocs[i - 1]:
raise ValueError('Locations not in ascending order')

if i < self.nblocks - 1:
if blocs[i] + blengths[i] > blocs[i + 1]:
raise ValueError('Block %d overlaps' % i)
raise ValueError('Block {idx} overlaps'.format(idx=i))
else:
if blocs[i] + blengths[i] > self.length:
raise ValueError('Block %d extends beyond end' % i)
raise ValueError('Block {idx} extends beyond end'
.format(idx=i))

# no zero-length blocks
if blengths[i] == 0:
raise ValueError('Zero-length block %d' % i)
raise ValueError('Zero-length block {idx}'.format(idx=i))

def equals(self, other):
if not isinstance(other, BlockIndex):
Expand All @@ -422,10 +423,10 @@ cdef class BlockIndex(SparseIndex):

indices = np.empty(self.npoints, dtype=np.int32)

for b from 0 <= b < self.nblocks:
for b in range(self.nblocks):
offset = self.locbuf[b]

for j from 0 <= j < self.lenbuf[b]:
for j in range(self.lenbuf[b]):
indices[i] = offset + j
i += 1

Expand Down Expand Up @@ -551,7 +552,7 @@ cdef class BlockIndex(SparseIndex):
return -1

cum_len = 0
for i from 0 <= i < self.nblocks:
for i in range(self.nblocks):
if index >= locs[i] and index < locs[i] + lens[i]:
return cum_len + index - locs[i]
cum_len += lens[i]
Expand Down Expand Up @@ -579,11 +580,11 @@ cdef class BlockIndex(SparseIndex):
if self.npoints == 0:
return results

for i from 0 <= i < n:
for i in range(n):
ind_val = indexer[i]
if not (ind_val < 0 or self.length <= ind_val):
cum_len = 0
for j from 0 <= j < self.nblocks:
for j in range(self.nblocks):
if ind_val >= locs[j] and ind_val < locs[j] + lens[j]:
results[i] = cum_len + ind_val - locs[j]
cum_len += lens[j]
Expand Down Expand Up @@ -824,7 +825,7 @@ def get_reindexer(ndarray[object, ndim=1] values, dict index_map):

# out = np.empty(length, dtype=np.float64)

# for i from 0 <= i < length:
# for i in range(length):
# if indexer[i] == -1:
# pass

Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/sparse_op_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_,
# Wow, what a hack job. Need to do something about this

# walk the two SparseVectors, adding matched locations...
for out_i from 0 <= out_i < out_index.npoints:
for out_i in range(out_index.npoints):
if yblock == yindex.nblocks:
# use y fill value
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
Expand Down Expand Up @@ -286,7 +286,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex,
out_indices = out_index.indices

# walk the two SparseVectors, adding matched locations...
for out_i from 0 <= out_i < out_index.npoints:
for out_i in range(out_index.npoints):
if xi == xindex.npoints:
# use x fill value
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/src/compat_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ The full license is in the LICENSE file, distributed with this software.
#define PANDAS__LIBS_SRC_COMPAT_HELPER_H_

#include "Python.h"
#include "helper.h"
#include "inline_helper.h"

/*
PySlice_GetIndicesEx changes signature in PY3
Expand Down
Loading