Skip to content

REF: Make Util functions accessible from C #50425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ from numpy cimport (
cnp.import_array()

cimport pandas._libs.util as util


cdef extern from "pandas/type.h":
bint is_integer_object(object obj)
bint is_array(object obj)

from pandas._libs.dtypes cimport (
numeric_object_t,
numeric_t,
Expand Down Expand Up @@ -513,7 +519,7 @@ def validate_limit(nobs: int | None, limit=None) -> int:
if limit is None:
lim = nobs
else:
if not util.is_integer_object(limit):
if not is_integer_object(limit):
raise ValueError("Limit must be an integer")
if limit < 1:
raise ValueError("Limit must be greater than 0")
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
def ensure_platform_int(object arr):
# GH3033, GH1392
# platform int is the size of the int pointer, e.g. np.intp
if util.is_array(arr):
if is_array(arr):
if (<ndarray>arr).descr.type_num == cnp.NPY_INTP:
return arr
else:
Expand All @@ -23,7 +23,7 @@ def ensure_platform_int(object arr):


def ensure_object(object arr):
if util.is_array(arr):
if is_array(arr):
if (<ndarray>arr).descr.type_num == NPY_OBJECT:
return arr
else:
Expand Down Expand Up @@ -61,7 +61,7 @@ def get_dispatch(dtypes):


def ensure_{{name}}(object arr, copy=True):
if util.is_array(arr):
if is_array(arr):
if (<ndarray>arr).descr.type_num == NPY_{{c_type}}:
return arr
else:
Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ from numpy cimport (

import_array()

from pandas._libs.util cimport is_nan
cdef extern from "pandas/type.h":
bint is_nan(object obj)
bint is_datetime64_object(object obj)
bint is_integer_object(object obj)


@cython.boundscheck(False)
Expand Down
16 changes: 11 additions & 5 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@ from numpy cimport (
cnp.import_array()


from pandas._libs cimport util
cdef extern from "pandas/type.h":
bint is_integer_object(object obj)
bint is_float_object(object obj)
bint is_bool_object(object obj)
bint is_complex_object(object obj)
bint is_nan(object obj)

from pandas._libs.hashtable cimport HashTable
from pandas._libs.tslibs.nattype cimport c_NaT as NaT
from pandas._libs.tslibs.np_datetime cimport (
Expand Down Expand Up @@ -74,7 +80,7 @@ cdef ndarray _get_bool_indexer(ndarray values, object val):
indexer[i] = is_matching_na(item, val)

else:
if util.is_nan(val):
if is_nan(val):
indexer = np.isnan(values)
else:
indexer = values == val
Expand Down Expand Up @@ -836,7 +842,7 @@ include "index_class_helper.pxi"

cdef class BoolEngine(UInt8Engine):
cdef _check_type(self, object val):
if not util.is_bool_object(val):
if not is_bool_object(val):
raise KeyError(val)
return <uint8_t>val

Expand Down Expand Up @@ -994,7 +1000,7 @@ cdef class SharedEngine:
except KeyError:
loc = -1
else:
assert util.is_integer_object(loc), (loc, val)
assert is_integer_object(loc), (loc, val)
res[i] = loc

return res
Expand Down Expand Up @@ -1032,7 +1038,7 @@ cdef class SharedEngine:
if isinstance(locs, slice):
# Only needed for get_indexer_non_unique
locs = np.arange(locs.start, locs.stop, locs.step, dtype=np.intp)
elif util.is_integer_object(locs):
elif is_integer_object(locs):
locs = np.array([locs], dtype=np.intp)
else:
assert locs.dtype.kind == "b"
Expand Down
12 changes: 6 additions & 6 deletions pandas/_libs/index_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ cdef class {{name}}Engine(IndexEngine):

cdef _check_type(self, object val):
{{if name not in {'Float64', 'Float32', 'Complex64', 'Complex128'} }}
if not util.is_integer_object(val):
if util.is_float_object(val):
if not is_integer_object(val):
if is_float_object(val):
# Make sure Int64Index.get_loc(2.0) works
if val.is_integer():
return int(val)
Expand All @@ -48,13 +48,13 @@ cdef class {{name}}Engine(IndexEngine):
raise KeyError(val)
{{endif}}
{{elif name not in {'Complex64', 'Complex128'} }}
if not util.is_integer_object(val) and not util.is_float_object(val):
if not is_integer_object(val) and not is_float_object(val):
# in particular catch bool and avoid casting True -> 1.0
raise KeyError(val)
{{else}}
if (not util.is_integer_object(val)
and not util.is_float_object(val)
and not util.is_complex_object(val)
if (not is_integer_object(val)
and not is_float_object(val)
and not is_complex_object(val)
):
# in particular catch bool and avoid casting True -> 1.0
raise KeyError(val)
Expand Down
11 changes: 8 additions & 3 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ from numpy cimport (
cnp.import_array()


from pandas._libs cimport util
from pandas._libs.hashtable cimport Int64Vector
from pandas._libs.tslibs.timedeltas cimport _Timedelta
from pandas._libs.tslibs.timestamps cimport _Timestamp
Expand All @@ -42,6 +41,12 @@ from pandas._libs.tslibs.util cimport (
is_timedelta64_object,
)


cdef extern from "pandas/type.h":
bint is_array(object obj)
bint is_nan(object obj)


VALID_CLOSED = frozenset(["left", "right", "both", "neither"])


Expand Down Expand Up @@ -360,7 +365,7 @@ cdef class Interval(IntervalMixin):
self_tuple = (self.left, self.right, self.closed)
other_tuple = (other.left, other.right, other.closed)
return PyObject_RichCompare(self_tuple, other_tuple, op)
elif util.is_array(other):
elif is_array(other):
return np.array(
[PyObject_RichCompare(self, x, op) for x in other],
dtype=bool,
Expand Down Expand Up @@ -551,7 +556,7 @@ def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True):

for i in range(n):
interval = intervals[i]
if interval is None or util.is_nan(interval):
if interval is None or is_nan(interval):
left[i] = np.nan
right[i] = np.nan
continue
Expand Down
11 changes: 11 additions & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,17 @@ cdef extern from "src/parse_helper.h":
int floatify(object, float64_t *result, int *maybe_int) except -1

from pandas._libs cimport util


cdef extern from "pandas/type.h":
bint is_integer_object(object obj)
bint is_float_object(object obj)
bint is_bool_object(object obj)
bint is_datetime64_object(object obj)
bint is_timedelta64_object(object obj)
bint is_array(object obj)


from pandas._libs.util cimport (
INT64_MAX,
INT64_MIN,
Expand Down
61 changes: 36 additions & 25 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ from sys import maxsize

cimport cython
from cython cimport Py_ssize_t

import numpy as np

cimport numpy as cnp
Expand All @@ -17,6 +18,16 @@ from numpy cimport (
cnp.import_array()

from pandas._libs cimport util


cdef extern from "pandas/type.h":
bint is_timedelta64_object(object obj)
bint is_float_object(object obj)
bint is_complex_object(object obj)
bint is_datetime64_object(object obj)
bint is_array(object obj)
bint is_nan(object obj)

from pandas._libs.tslibs.nattype cimport (
c_NaT as NaT,
checknull_with_nat,
Expand Down Expand Up @@ -89,38 +100,38 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False
bool
"""
if left is None:
if nan_matches_none and util.is_nan(right):
if nan_matches_none and is_nan(right):
return True
return right is None
elif left is C_NA:
return right is C_NA
elif left is NaT:
return right is NaT
elif util.is_float_object(left):
if nan_matches_none and right is None and util.is_nan(left):
elif is_float_object(left):
if nan_matches_none and right is None and is_nan(left):
return True
return (
util.is_nan(left)
and util.is_float_object(right)
and util.is_nan(right)
is_nan(left)
and is_float_object(right)
and is_nan(right)
)
elif util.is_complex_object(left):
elif is_complex_object(left):
return (
util.is_nan(left)
and util.is_complex_object(right)
and util.is_nan(right)
is_nan(left)
and is_complex_object(right)
and is_nan(right)
)
elif util.is_datetime64_object(left):
elif is_datetime64_object(left):
return (
get_datetime64_value(left) == NPY_NAT
and util.is_datetime64_object(right)
and is_datetime64_object(right)
and get_datetime64_value(right) == NPY_NAT
and get_datetime64_unit(left) == get_datetime64_unit(right)
)
elif util.is_timedelta64_object(left):
elif is_timedelta64_object(left):
return (
get_timedelta64_value(left) == NPY_NAT
and util.is_timedelta64_object(right)
and is_timedelta64_object(right)
and get_timedelta64_value(right) == NPY_NAT
and get_datetime64_unit(left) == get_datetime64_unit(right)
)
Expand Down Expand Up @@ -153,15 +164,15 @@ cpdef bint checknull(object val, bint inf_as_na=False):
"""
if val is None or val is NaT or val is C_NA:
return True
elif util.is_float_object(val) or util.is_complex_object(val):
elif is_float_object(val) or is_complex_object(val):
if val != val:
return True
elif inf_as_na:
return val == INF or val == NEGINF
return False
elif util.is_timedelta64_object(val):
elif is_timedelta64_object(val):
return get_timedelta64_value(val) == NPY_NAT
elif util.is_datetime64_object(val):
elif is_datetime64_object(val):
return get_datetime64_value(val) == NPY_NAT
else:
return is_decimal_na(val)
Expand Down Expand Up @@ -251,11 +262,11 @@ def isnaobj2d(arr: ndarray, inf_as_na: bool = False) -> ndarray:


def isposinf_scalar(val: object) -> bool:
return util.is_float_object(val) and val == INF
return is_float_object(val) and val == INF


def isneginf_scalar(val: object) -> bool:
return util.is_float_object(val) and val == NEGINF
return is_float_object(val) and val == NEGINF


cdef bint is_null_datetime64(v):
Expand Down Expand Up @@ -299,7 +310,7 @@ def is_float_nan(values: ndarray) -> ndarray:

for i in range(N):
val = values[i]
if util.is_nan(val):
if is_nan(val):
result[i] = True
return result.view(bool)

Expand Down Expand Up @@ -327,7 +338,7 @@ def is_numeric_na(values: ndarray) -> ndarray:
for i in range(N):
val = values[i]
if checknull(val):
if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val):
if val is None or val is C_NA or is_nan(val) or is_decimal_na(val):
result[i] = True
else:
raise TypeError(f"'values' contains non-numeric NA {val}")
Expand All @@ -343,7 +354,7 @@ def _create_binary_propagating_op(name, is_divmod=False):
def method(self, other):
if (other is C_NA or isinstance(other, (str, bytes))
or isinstance(other, (numbers.Number, np.bool_))
or util.is_array(other) and not other.shape):
or is_array(other) and not other.shape):
# Need the other.shape clause to handle NumPy scalars,
# since we do a setitem on `out` below, which
# won't work for NumPy scalars.
Expand All @@ -352,7 +363,7 @@ def _create_binary_propagating_op(name, is_divmod=False):
else:
return NA

elif util.is_array(other):
elif is_array(other):
out = np.empty(other.shape, dtype=object)
out[:] = NA

Expand Down Expand Up @@ -464,7 +475,7 @@ class NAType(C_NAType):
return type(other)(1)
else:
return NA
elif util.is_array(other):
elif is_array(other):
return np.where(other == 0, other.dtype.type(1), NA)

return NotImplemented
Expand All @@ -477,7 +488,7 @@ class NAType(C_NAType):
return other
else:
return NA
elif util.is_array(other):
elif is_array(other):
return np.where(other == 1, other, NA)
return NotImplemented

Expand Down
Loading