From baab33c83f0e3f3e992fb8a5ddd8d88366ebe979 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 23 Dec 2022 12:59:23 -0800 Subject: [PATCH 1/4] initial move from cython to C --- pandas/_libs/pandas/type.h | 159 ++++++++++++++++++++++++ pandas/_libs/tslibs/np_datetime.pxd | 2 +- pandas/_libs/tslibs/np_datetime.pyx | 4 +- pandas/_libs/tslibs/util.pxd | 185 ++-------------------------- setup.py | 1 + 5 files changed, 174 insertions(+), 177 deletions(-) create mode 100644 pandas/_libs/pandas/type.h diff --git a/pandas/_libs/pandas/type.h b/pandas/_libs/pandas/type.h new file mode 100644 index 0000000000000..a0841e7aa861a --- /dev/null +++ b/pandas/_libs/pandas/type.h @@ -0,0 +1,159 @@ +/* +Copyright (c) 2022-, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_PANDAS_TYPE_H_ +#define PANDAS__LIBS_PANDAS_TYPE_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#define PY_SSIZE_T_CLEAN +#include +#include + +/* +Cython equivalent of `isinstance(val, np.timedelta64)` + +Parameters +---------- +val : object + +Returns +------- +is_timedelta64 : bool +*/ +int is_timedelta64_object(PyObject *obj) { + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type); +} + +/* +Cython equivalent of + +`isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)` + +Parameters +---------- +val : object + +Returns +------- +is_integer : bool + +Notes +----- +This counts np.timedelta64 objects as integers. +*/ +int is_integer_object(PyObject *obj) { + return !PyBool_Check(obj) && PyArray_IsIntegerScalar(obj) + && !is_timedelta64_object(obj); +} + +/* +Cython equivalent of `isinstance(val, (float, np.complex_))` + +Parameters +---------- +val : object + +Returns +------- +is_float : bool +*/ +int is_float_object(PyObject *obj) { + return PyFloat_Check(obj) || PyObject_TypeCheck(obj, &PyFloatingArrType_Type); +} + +/* +Cython equivalent of `isinstance(val, (complex, np.complex_))` + +Parameters +---------- +val : object + +Returns +------- +is_complex : bool +*/ +int is_complex_object(PyObject *obj) { + return PyComplex_Check(obj) || + PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type); +} + +/* +Cython equivalent of `isinstance(val, (bool, np.bool_))` + +Parameters +---------- +val : object + +Returns +------- +is_bool : bool +*/ +int is_bool_object(PyObject *obj) { + return PyBool_Check(obj) || PyObject_TypeCheck(obj, &PyBoolArrType_Type); +} + +int is_real_number_object(PyObject *obj) { + return is_bool_object(obj) || is_integer_object(obj) || is_float_object(obj); +} + +/* +Cython equivalent of `isinstance(val, np.datetime64)` + +Parameters +---------- +val : object + +Returns +------- +is_datetime64 : bool +*/ +int is_datetime64_object(PyObject *obj) { + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type); +} + +/* +Cython equivalent of `isinstance(val, np.ndarray)` + +Parameters +---------- +val : object + +Returns +------- +is_ndarray : bool +*/ +int is_array(PyObject *obj) { return PyArray_Check(obj); } + + +/* +Check if val is a Not-A-Number float or complex, including +float('NaN') and np.nan. + +Parameters +---------- +val : object + +Returns +------- +is_nan : bool +*/ +int is_nan(PyObject *obj) { + if (is_float_object(obj)) { + return PyObject_RichCompareBool(obj, obj, Py_EQ); + } + + return is_complex_object(obj) && PyObject_RichCompareBool(obj, obj, Py_EQ); +} + +#ifdef __cplusplus +} +#endif +#endif // PANDAS__LIBS_PANDAS_TYPE_H_ diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index de81c611c9ee9..3c7f3859cb6b2 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -54,7 +54,7 @@ cdef extern from "numpy/ndarraytypes.h": int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT -cdef extern from "src/datetime/np_datetime.h": +cdef extern from "tslibs/src/datetime/np_datetime.h": ctypedef struct pandas_timedeltastruct: int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 9db3f7cb4648e..a48ac9f0de44d 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -35,7 +35,7 @@ from numpy cimport ( from pandas._libs.tslibs.util cimport get_c_string_buf_and_size -cdef extern from "src/datetime/np_datetime.h": +cdef extern from "tslibs/src/datetime/np_datetime.h": int cmp_npy_datetimestruct(npy_datetimestruct *a, npy_datetimestruct *b) @@ -48,7 +48,7 @@ cdef extern from "src/datetime/np_datetime.h": PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype) -cdef extern from "src/datetime/np_datetime_strings.h": +cdef extern from "tslibs/src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, NPY_DATETIMEUNIT *out_bestunit, diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index a28aace5d2f15..2d4a74194a55c 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,24 +1,4 @@ - -from cpython.object cimport PyTypeObject - - -cdef extern from *: - """ - PyObject* char_to_string(const char* data) { - return PyUnicode_FromString(data); - } - """ - object char_to_string(const char* data) - - cdef extern from "Python.h": - # Note: importing extern-style allows us to declare these as nogil - # functions, whereas `from cpython cimport` does not. - bint PyBool_Check(object obj) nogil - bint PyFloat_Check(object obj) nogil - bint PyComplex_Check(object obj) nogil - bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil - # Note that following functions can potentially raise an exception, # thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can # potentially allocate memory inside in unlikely case of when underlying @@ -30,23 +10,19 @@ cdef extern from "Python.h": object PyUnicode_DecodeLocale(const char *str, const char *errors) nogil -from numpy cimport ( - float64_t, - int64_t, -) - +from numpy cimport int64_t -cdef extern from "numpy/arrayobject.h": - PyTypeObject PyFloatingArrType_Type -cdef extern from "numpy/ndarrayobject.h": - PyTypeObject PyTimedeltaArrType_Type - PyTypeObject PyDatetimeArrType_Type - PyTypeObject PyComplexFloatingArrType_Type - PyTypeObject PyBoolArrType_Type - - bint PyArray_IsIntegerScalar(obj) nogil - bint PyArray_Check(obj) nogil +cdef extern from "pandas/type.h": + bint is_timedelta64_object(object obj) + bint is_integer_object(object obj) + bint is_float_object(object obj) + bint is_complex_object(object obj) + bint is_bool_object(object obj) + bint is_real_number_object(object obj) + bint is_datetime64_object(object obj) + bint is_array(object obj) + bint is_nan(object obj) cdef extern from "numpy/npy_common.h": int64_t NPY_MIN_INT64 @@ -59,145 +35,6 @@ cdef inline int64_t get_nat(): # -------------------------------------------------------------------- # Type Checking -cdef inline bint is_integer_object(object obj) nogil: - """ - Cython equivalent of - - `isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)` - - Parameters - ---------- - val : object - - Returns - ------- - is_integer : bool - - Notes - ----- - This counts np.timedelta64 objects as integers. - """ - return (not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) - and not is_timedelta64_object(obj)) - - -cdef inline bint is_float_object(object obj) nogil: - """ - Cython equivalent of `isinstance(val, (float, np.complex_))` - - Parameters - ---------- - val : object - - Returns - ------- - is_float : bool - """ - return (PyFloat_Check(obj) or - (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) - - -cdef inline bint is_complex_object(object obj) nogil: - """ - Cython equivalent of `isinstance(val, (complex, np.complex_))` - - Parameters - ---------- - val : object - - Returns - ------- - is_complex : bool - """ - return (PyComplex_Check(obj) or - PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) - - -cdef inline bint is_bool_object(object obj) nogil: - """ - Cython equivalent of `isinstance(val, (bool, np.bool_))` - - Parameters - ---------- - val : object - - Returns - ------- - is_bool : bool - """ - return (PyBool_Check(obj) or - PyObject_TypeCheck(obj, &PyBoolArrType_Type)) - - -cdef inline bint is_real_number_object(object obj) nogil: - return is_bool_object(obj) or is_integer_object(obj) or is_float_object(obj) - - -cdef inline bint is_timedelta64_object(object obj) nogil: - """ - Cython equivalent of `isinstance(val, np.timedelta64)` - - Parameters - ---------- - val : object - - Returns - ------- - is_timedelta64 : bool - """ - return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - - -cdef inline bint is_datetime64_object(object obj) nogil: - """ - Cython equivalent of `isinstance(val, np.datetime64)` - - Parameters - ---------- - val : object - - Returns - ------- - is_datetime64 : bool - """ - return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - - -cdef inline bint is_array(object val): - """ - Cython equivalent of `isinstance(val, np.ndarray)` - - Parameters - ---------- - val : object - - Returns - ------- - is_ndarray : bool - """ - return PyArray_Check(val) - - -cdef inline bint is_nan(object val): - """ - Check if val is a Not-A-Number float or complex, including - float('NaN') and np.nan. - - Parameters - ---------- - val : object - - Returns - ------- - is_nan : bool - """ - cdef float64_t fval - if is_float_object(val): - fval = val - return fval != fval - return is_complex_object(val) and val != val - - cdef inline const char* get_c_string_buf_and_size(str py_string, Py_ssize_t *length) except NULL: """ diff --git a/setup.py b/setup.py index f8fa048757289..41c6f0ad2cc62 100755 --- a/setup.py +++ b/setup.py @@ -585,6 +585,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): sources.extend(data.get("sources", [])) include = data.get("include", []) + include.append("pandas/_libs") include.append(numpy.get_include()) undef_macros = [] From 009efd33bec27dee75697d76abd139e5ca747b13 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 23 Dec 2022 15:46:44 -0800 Subject: [PATCH 2/4] nan typo fixup --- pandas/_libs/pandas/type.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/pandas/type.h b/pandas/_libs/pandas/type.h index a0841e7aa861a..a17816b1ce4de 100644 --- a/pandas/_libs/pandas/type.h +++ b/pandas/_libs/pandas/type.h @@ -147,10 +147,10 @@ is_nan : bool */ int is_nan(PyObject *obj) { if (is_float_object(obj)) { - return PyObject_RichCompareBool(obj, obj, Py_EQ); + return PyObject_RichCompareBool(obj, obj, Py_NE); } - return is_complex_object(obj) && PyObject_RichCompareBool(obj, obj, Py_EQ); + return is_complex_object(obj) && PyObject_RichCompareBool(obj, obj, Py_NE); } #ifdef __cplusplus From 457304cc27491bb2523a19f55877c57fd4879869 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 23 Dec 2022 16:31:41 -0800 Subject: [PATCH 3/4] updated setup script --- pandas/_libs/algos.pyx | 8 +++- pandas/_libs/algos_common_helper.pxi.in | 6 +-- pandas/_libs/hashing.pyx | 5 +- pandas/_libs/index.pyx | 16 +++++-- pandas/_libs/index_class_helper.pxi.in | 12 ++--- pandas/_libs/interval.pyx | 11 +++-- pandas/_libs/lib.pyx | 11 +++++ pandas/_libs/missing.pyx | 61 +++++++++++++++---------- pandas/_libs/parsers.pyx | 6 ++- pandas/_libs/reduction.pyx | 3 +- pandas/_libs/tslib.pyx | 11 +++-- pandas/_libs/tslibs/conversion.pyx | 11 +++-- pandas/_libs/tslibs/nattype.pyx | 44 +++++++++++------- pandas/_libs/tslibs/offsets.pyx | 23 +++++----- pandas/_libs/tslibs/period.pyx | 34 ++++++++------ pandas/_libs/tslibs/strptime.pyx | 13 ++++-- pandas/_libs/tslibs/timedeltas.pyx | 23 +++++----- pandas/_libs/tslibs/timestamps.pyx | 12 +++-- pandas/_libs/tslibs/timezones.pyx | 9 ++-- setup.py | 47 ++++++++++++------- 20 files changed, 226 insertions(+), 140 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7fcba58772ac4..8e2e5d113d58e 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -38,6 +38,12 @@ from numpy cimport ( cnp.import_array() cimport pandas._libs.util as util + + +cdef extern from "pandas/type.h": + bint is_integer_object(object obj) + bint is_array(object obj) + from pandas._libs.dtypes cimport ( numeric_object_t, numeric_t, @@ -513,7 +519,7 @@ def validate_limit(nobs: int | None, limit=None) -> int: if limit is None: lim = nobs else: - if not util.is_integer_object(limit): + if not is_integer_object(limit): raise ValueError("Limit must be an integer") if limit < 1: raise ValueError("Limit must be greater than 0") diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index ce2e1ffbb5870..ae66cb8cec211 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -12,7 +12,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in def ensure_platform_int(object arr): # GH3033, GH1392 # platform int is the size of the int pointer, e.g. np.intp - if util.is_array(arr): + if is_array(arr): if (arr).descr.type_num == cnp.NPY_INTP: return arr else: @@ -23,7 +23,7 @@ def ensure_platform_int(object arr): def ensure_object(object arr): - if util.is_array(arr): + if is_array(arr): if (arr).descr.type_num == NPY_OBJECT: return arr else: @@ -61,7 +61,7 @@ def get_dispatch(dtypes): def ensure_{{name}}(object arr, copy=True): - if util.is_array(arr): + if is_array(arr): if (arr).descr.type_num == NPY_{{c_type}}: return arr else: diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 197ec99247b4a..885b143a41000 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -18,7 +18,10 @@ from numpy cimport ( import_array() -from pandas._libs.util cimport is_nan +cdef extern from "pandas/type.h": + bint is_nan(object obj) + bint is_datetime64_object(object obj) + bint is_integer_object(object obj) @cython.boundscheck(False) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 9e2adee407b1a..e86c01c5d1038 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -14,7 +14,13 @@ from numpy cimport ( cnp.import_array() -from pandas._libs cimport util +cdef extern from "pandas/type.h": + bint is_integer_object(object obj) + bint is_float_object(object obj) + bint is_bool_object(object obj) + bint is_complex_object(object obj) + bint is_nan(object obj) + from pandas._libs.hashtable cimport HashTable from pandas._libs.tslibs.nattype cimport c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( @@ -74,7 +80,7 @@ cdef ndarray _get_bool_indexer(ndarray values, object val): indexer[i] = is_matching_na(item, val) else: - if util.is_nan(val): + if is_nan(val): indexer = np.isnan(values) else: indexer = values == val @@ -836,7 +842,7 @@ include "index_class_helper.pxi" cdef class BoolEngine(UInt8Engine): cdef _check_type(self, object val): - if not util.is_bool_object(val): + if not is_bool_object(val): raise KeyError(val) return val @@ -994,7 +1000,7 @@ cdef class SharedEngine: except KeyError: loc = -1 else: - assert util.is_integer_object(loc), (loc, val) + assert is_integer_object(loc), (loc, val) res[i] = loc return res @@ -1032,7 +1038,7 @@ cdef class SharedEngine: if isinstance(locs, slice): # Only needed for get_indexer_non_unique locs = np.arange(locs.start, locs.stop, locs.step, dtype=np.intp) - elif util.is_integer_object(locs): + elif is_integer_object(locs): locs = np.array([locs], dtype=np.intp) else: assert locs.dtype.kind == "b" diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index b9c02ba64f69c..6da1e50bab178 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -36,8 +36,8 @@ cdef class {{name}}Engine(IndexEngine): cdef _check_type(self, object val): {{if name not in {'Float64', 'Float32', 'Complex64', 'Complex128'} }} - if not util.is_integer_object(val): - if util.is_float_object(val): + if not is_integer_object(val): + if is_float_object(val): # Make sure Int64Index.get_loc(2.0) works if val.is_integer(): return int(val) @@ -48,13 +48,13 @@ cdef class {{name}}Engine(IndexEngine): raise KeyError(val) {{endif}} {{elif name not in {'Complex64', 'Complex128'} }} - if not util.is_integer_object(val) and not util.is_float_object(val): + if not is_integer_object(val) and not is_float_object(val): # in particular catch bool and avoid casting True -> 1.0 raise KeyError(val) {{else}} - if (not util.is_integer_object(val) - and not util.is_float_object(val) - and not util.is_complex_object(val) + if (not is_integer_object(val) + and not is_float_object(val) + and not is_complex_object(val) ): # in particular catch bool and avoid casting True -> 1.0 raise KeyError(val) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index fc2c486173b9d..3318f7ac8cef8 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -31,7 +31,6 @@ from numpy cimport ( cnp.import_array() -from pandas._libs cimport util from pandas._libs.hashtable cimport Int64Vector from pandas._libs.tslibs.timedeltas cimport _Timedelta from pandas._libs.tslibs.timestamps cimport _Timestamp @@ -42,6 +41,12 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, ) + +cdef extern from "pandas/type.h": + bint is_array(object obj) + bint is_nan(object obj) + + VALID_CLOSED = frozenset(["left", "right", "both", "neither"]) @@ -360,7 +365,7 @@ cdef class Interval(IntervalMixin): self_tuple = (self.left, self.right, self.closed) other_tuple = (other.left, other.right, other.closed) return PyObject_RichCompare(self_tuple, other_tuple, op) - elif util.is_array(other): + elif is_array(other): return np.array( [PyObject_RichCompare(self, x, op) for x in other], dtype=bool, @@ -551,7 +556,7 @@ def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True): for i in range(n): interval = intervals[i] - if interval is None or util.is_nan(interval): + if interval is None or is_nan(interval): left[i] = np.nan right[i] = np.nan continue diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index bc7b876cb5de8..65624f9971a18 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -92,6 +92,17 @@ cdef extern from "src/parse_helper.h": int floatify(object, float64_t *result, int *maybe_int) except -1 from pandas._libs cimport util + + +cdef extern from "pandas/type.h": + bint is_integer_object(object obj) + bint is_float_object(object obj) + bint is_bool_object(object obj) + bint is_datetime64_object(object obj) + bint is_timedelta64_object(object obj) + bint is_array(object obj) + + from pandas._libs.util cimport ( INT64_MAX, INT64_MIN, diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index a3b0451381ad2..e4fdf0b083240 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -4,6 +4,7 @@ from sys import maxsize cimport cython from cython cimport Py_ssize_t + import numpy as np cimport numpy as cnp @@ -17,6 +18,16 @@ from numpy cimport ( cnp.import_array() from pandas._libs cimport util + + +cdef extern from "pandas/type.h": + bint is_timedelta64_object(object obj) + bint is_float_object(object obj) + bint is_complex_object(object obj) + bint is_datetime64_object(object obj) + bint is_array(object obj) + bint is_nan(object obj) + from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, checknull_with_nat, @@ -89,38 +100,38 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False bool """ if left is None: - if nan_matches_none and util.is_nan(right): + if nan_matches_none and is_nan(right): return True return right is None elif left is C_NA: return right is C_NA elif left is NaT: return right is NaT - elif util.is_float_object(left): - if nan_matches_none and right is None and util.is_nan(left): + elif is_float_object(left): + if nan_matches_none and right is None and is_nan(left): return True return ( - util.is_nan(left) - and util.is_float_object(right) - and util.is_nan(right) + is_nan(left) + and is_float_object(right) + and is_nan(right) ) - elif util.is_complex_object(left): + elif is_complex_object(left): return ( - util.is_nan(left) - and util.is_complex_object(right) - and util.is_nan(right) + is_nan(left) + and is_complex_object(right) + and is_nan(right) ) - elif util.is_datetime64_object(left): + elif is_datetime64_object(left): return ( get_datetime64_value(left) == NPY_NAT - and util.is_datetime64_object(right) + and is_datetime64_object(right) and get_datetime64_value(right) == NPY_NAT and get_datetime64_unit(left) == get_datetime64_unit(right) ) - elif util.is_timedelta64_object(left): + elif is_timedelta64_object(left): return ( get_timedelta64_value(left) == NPY_NAT - and util.is_timedelta64_object(right) + and is_timedelta64_object(right) and get_timedelta64_value(right) == NPY_NAT and get_datetime64_unit(left) == get_datetime64_unit(right) ) @@ -153,15 +164,15 @@ cpdef bint checknull(object val, bint inf_as_na=False): """ if val is None or val is NaT or val is C_NA: return True - elif util.is_float_object(val) or util.is_complex_object(val): + elif is_float_object(val) or is_complex_object(val): if val != val: return True elif inf_as_na: return val == INF or val == NEGINF return False - elif util.is_timedelta64_object(val): + elif is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT - elif util.is_datetime64_object(val): + elif is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT else: return is_decimal_na(val) @@ -251,11 +262,11 @@ def isnaobj2d(arr: ndarray, inf_as_na: bool = False) -> ndarray: def isposinf_scalar(val: object) -> bool: - return util.is_float_object(val) and val == INF + return is_float_object(val) and val == INF def isneginf_scalar(val: object) -> bool: - return util.is_float_object(val) and val == NEGINF + return is_float_object(val) and val == NEGINF cdef bint is_null_datetime64(v): @@ -299,7 +310,7 @@ def is_float_nan(values: ndarray) -> ndarray: for i in range(N): val = values[i] - if util.is_nan(val): + if is_nan(val): result[i] = True return result.view(bool) @@ -327,7 +338,7 @@ def is_numeric_na(values: ndarray) -> ndarray: for i in range(N): val = values[i] if checknull(val): - if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val): + if val is None or val is C_NA or is_nan(val) or is_decimal_na(val): result[i] = True else: raise TypeError(f"'values' contains non-numeric NA {val}") @@ -343,7 +354,7 @@ def _create_binary_propagating_op(name, is_divmod=False): def method(self, other): if (other is C_NA or isinstance(other, (str, bytes)) or isinstance(other, (numbers.Number, np.bool_)) - or util.is_array(other) and not other.shape): + or is_array(other) and not other.shape): # Need the other.shape clause to handle NumPy scalars, # since we do a setitem on `out` below, which # won't work for NumPy scalars. @@ -352,7 +363,7 @@ def _create_binary_propagating_op(name, is_divmod=False): else: return NA - elif util.is_array(other): + elif is_array(other): out = np.empty(other.shape, dtype=object) out[:] = NA @@ -464,7 +475,7 @@ class NAType(C_NAType): return type(other)(1) else: return NA - elif util.is_array(other): + elif is_array(other): return np.where(other == 0, other.dtype.type(1), NA) return NotImplemented @@ -477,7 +488,7 @@ class NAType(C_NAType): return other else: return NA - elif util.is_array(other): + elif is_array(other): return np.where(other == 1, other, NA) return NotImplemented diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 1941cfde4acb9..c28f9ff036f7a 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -62,7 +62,9 @@ from numpy cimport ( cnp.import_array() -from pandas._libs cimport util +cdef extern from "pandas/type.h": + bint is_integer_object(object obj) + from pandas._libs.util cimport ( INT64_MAX, INT64_MIN, @@ -594,7 +596,7 @@ cdef class TextReader: self.parser.quotechar = ord(quote_char) cdef _make_skiprow_set(self): - if util.is_integer_object(self.skiprows): + if is_integer_object(self.skiprows): parser_set_skipfirstnrows(self.parser, self.skiprows) elif not callable(self.skiprows): for i in self.skiprows: diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 7ff0842678d7f..b33f10fbcded6 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -4,7 +4,8 @@ cimport numpy as cnp cnp.import_array() -from pandas._libs.util cimport is_array +cdef extern from "pandas/type.h": + bint is_array(object obj) cdef cnp.dtype _dtype_obj = np.dtype("object") diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c1a30e03235b5..ffce1aaef4be3 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -38,11 +38,12 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, string_to_dts, ) -from pandas._libs.util cimport ( - is_datetime64_object, - is_float_object, - is_integer_object, -) + + +cdef extern from "pandas/type.h": + bint is_datetime64_object(object obj) + bint is_float_object(object obj) + bint is_integer_object(object obj) from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.parsing import parse_datetime_string diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6cd579d59c900..5238683a54006 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -55,11 +55,12 @@ from pandas._libs.tslibs.timezones cimport ( is_utc, maybe_get_tz, ) -from pandas._libs.tslibs.util cimport ( - is_datetime64_object, - is_float_object, - is_integer_object, -) + + +cdef extern from "pandas/type.h": + bint is_datetime64_object(object obj) + bint is_float_object(object obj) + bint is_integer_object(object obj) from pandas._libs.tslibs.parsing import parse_datetime_string diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 9407f57a282bf..eb10647e2364b 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -22,6 +22,16 @@ from numpy cimport int64_t cnp.import_array() cimport pandas._libs.tslibs.util as util + + +cdef extern from "pandas/type.h": + bint is_timedelta64_object(object obj) + bint is_integer_object(object obj) + bint is_float_object(object obj) + bint is_datetime64_object(object obj) + bint is_array(object obj) + bint is_nan(object obj) + from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_value, get_timedelta64_value, @@ -68,9 +78,9 @@ def _make_error_func(func_name: str, cls): cdef _nat_divide_op(self, other): - if PyDelta_Check(other) or util.is_timedelta64_object(other) or other is c_NaT: + if PyDelta_Check(other) or is_timedelta64_object(other) or other is c_NaT: return np.nan - if util.is_integer_object(other) or util.is_float_object(other): + if is_integer_object(other) or is_float_object(other): return c_NaT return NotImplemented @@ -96,15 +106,15 @@ cdef class _NaT(datetime): __array_priority__ = 100 def __richcmp__(_NaT self, object other, int op): - if util.is_datetime64_object(other) or PyDateTime_Check(other): + if is_datetime64_object(other) or PyDateTime_Check(other): # We treat NaT as datetime-like for this comparison return op == Py_NE - elif util.is_timedelta64_object(other) or PyDelta_Check(other): + elif is_timedelta64_object(other) or PyDelta_Check(other): # We treat NaT as timedelta-like for this comparison return op == Py_NE - elif util.is_array(other): + elif is_array(other): if other.dtype.kind in "mM": result = np.empty(other.shape, dtype=np.bool_) result.fill(op == Py_NE) @@ -138,14 +148,14 @@ cdef class _NaT(datetime): return c_NaT elif PyDelta_Check(other): return c_NaT - elif util.is_datetime64_object(other) or util.is_timedelta64_object(other): + elif is_datetime64_object(other) or is_timedelta64_object(other): return c_NaT - elif util.is_integer_object(other): + elif is_integer_object(other): # For Period compat return c_NaT - elif util.is_array(other): + elif is_array(other): if other.dtype.kind in "mM": # If we are adding to datetime64, we treat NaT as timedelta # Either way, result dtype is datetime64 @@ -176,14 +186,14 @@ cdef class _NaT(datetime): return c_NaT elif PyDelta_Check(other): return c_NaT - elif util.is_datetime64_object(other) or util.is_timedelta64_object(other): + elif is_datetime64_object(other) or is_timedelta64_object(other): return c_NaT - elif util.is_integer_object(other): + elif is_integer_object(other): # For Period compat return c_NaT - elif util.is_array(other): + elif is_array(other): if other.dtype.kind == "m": if not is_rsub: # NaT - timedelta64 we treat NaT as datetime64, so result @@ -216,7 +226,7 @@ cdef class _NaT(datetime): return NotImplemented def __rsub__(self, other): - if util.is_array(other): + if is_array(other): if other.dtype.kind == "m": # timedelta64 - NaT we have to treat NaT as timedelta64 # for this to be meaningful, and the result is timedelta64 @@ -247,7 +257,7 @@ cdef class _NaT(datetime): return _nat_divide_op(self, other) def __mul__(self, other): - if util.is_integer_object(other) or util.is_float_object(other): + if is_integer_object(other) or is_float_object(other): return NaT return NotImplemented @@ -377,7 +387,7 @@ class NaTType(_NaT): return _nat_rdivide_op(self, other) def __rmul__(self, other): - if util.is_integer_object(other) or util.is_float_object(other): + if is_integer_object(other) or is_float_object(other): return c_NaT return NotImplemented @@ -1220,14 +1230,14 @@ cdef bint checknull_with_nat(object val): """ Utility to check if a value is a nat or not. """ - return val is None or util.is_nan(val) or val is c_NaT + return val is None or is_nan(val) or val is c_NaT cdef bint is_dt64nat(object val): """ Is this a np.datetime64 object np.datetime64("NaT"). """ - if util.is_datetime64_object(val): + if is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT return False @@ -1236,6 +1246,6 @@ cdef bint is_td64nat(object val): """ Is this a np.timedelta64 object np.timedelta64("NaT"). """ - if util.is_timedelta64_object(val): + if is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT return False diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 470d1e89e5b88..87da874635c34 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -31,12 +31,13 @@ cnp.import_array() from pandas._libs.properties import cache_readonly -from pandas._libs.tslibs cimport util -from pandas._libs.tslibs.util cimport ( - is_datetime64_object, - is_float_object, - is_integer_object, -) + +cdef extern from "pandas/type.h": + bint is_datetime64_object(object obj) + bint is_float_object(object obj) + bint is_integer_object(object obj) + bint is_timedelta64_object(object obj) + bint is_array(object obj) from pandas._libs.tslibs.ccalendar import ( MONTH_ALIASES, @@ -143,7 +144,7 @@ def apply_wraps(func): elif ( isinstance(other, BaseOffset) or PyDelta_Check(other) - or util.is_timedelta64_object(other) + or is_timedelta64_object(other) ): # timedelta path return func(self, other) @@ -479,7 +480,7 @@ cdef class BaseOffset: # TODO(cython3): remove this, this moved to __radd__ return other.__add__(self) - elif util.is_array(other) and other.dtype == object: + elif is_array(other) and other.dtype == object: return np.array([self + x for x in other]) try: @@ -508,7 +509,7 @@ cdef class BaseOffset: return (-self).__add__(other) def __mul__(self, other): - if util.is_array(other): + if is_array(other): return np.array([self * x for x in other]) elif is_integer_object(other): return type(self)(n=other * self.n, normalize=self.normalize, @@ -746,7 +747,7 @@ cdef class BaseOffset: TypeError if `int(n)` raises ValueError if n != int(n) """ - if util.is_timedelta64_object(n): + if is_timedelta64_object(n): raise TypeError(f"`n` argument must be an integer, got {type(n)}") try: nint = int(n) @@ -1071,7 +1072,7 @@ cdef class Tick(SingleConstructorOffset): # PyDate_Check includes date, datetime return Timestamp(other) + self - if util.is_timedelta64_object(other) or PyDelta_Check(other): + if is_timedelta64_object(other) or PyDelta_Check(other): return other + self.delta raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 8955fb678d075..63d7053003dc3 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -39,7 +39,15 @@ from libc.time cimport ( # import datetime C API import_datetime() -cimport pandas._libs.tslibs.util as util +cimport pandas._libs.util as util + + +cdef extern from "pandas/type.h": + bint is_datetime64_object(object obj) + bint is_timedelta64_object(object obj) + bint is_integer_object(object obj) + bint is_array(object obj) + from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, @@ -1474,7 +1482,7 @@ cdef int64_t _extract_ordinal(object item, str freqstr, freq) except? -1: if checknull_with_nat(item) or item is C_NA: ordinal = NPY_NAT - elif util.is_integer_object(item): + elif is_integer_object(item): if item == NPY_NAT: ordinal = NPY_NAT else: @@ -1667,7 +1675,7 @@ cdef class _Period(PeriodMixin): return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) elif other is NaT: return op == Py_NE - elif util.is_array(other): + elif is_array(other): # GH#44285 if cnp.PyArray_IsZeroDim(other): return PyObject_RichCompare(self, other.item(), op) @@ -1688,7 +1696,7 @@ cdef class _Period(PeriodMixin): f"Period(freq={self.freqstr})") if ( - util.is_timedelta64_object(other) and + is_timedelta64_object(other) and get_timedelta64_value(other) == NPY_NAT ): # i.e. np.timedelta64("nat") @@ -1727,7 +1735,7 @@ cdef class _Period(PeriodMixin): return self._add_offset(other) elif other is NaT: return NaT - elif util.is_integer_object(other): + elif is_integer_object(other): ordinal = self.ordinal + other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) @@ -1741,7 +1749,7 @@ cdef class _Period(PeriodMixin): raise TypeError(f"unsupported operand type(s) for +: '{sname}' " f"and '{oname}'") - elif util.is_array(other): + elif is_array(other): if other.dtype == object: # GH#50162 return np.array([self + x for x in other], dtype=object) @@ -1762,7 +1770,7 @@ cdef class _Period(PeriodMixin): elif ( is_any_td_scalar(other) or is_offset_object(other) - or util.is_integer_object(other) + or is_integer_object(other) ): return self + (-other) elif is_period_object(other): @@ -1772,7 +1780,7 @@ cdef class _Period(PeriodMixin): elif other is NaT: return NaT - elif util.is_array(other): + elif is_array(other): if other.dtype == object: # GH#50162 return np.array([self - x for x in other], dtype=object) @@ -1783,7 +1791,7 @@ cdef class _Period(PeriodMixin): if other is NaT: return NaT - elif util.is_array(other): + elif is_array(other): if other.dtype == object: # GH#50162 return np.array([x - self for x in other], dtype=object) @@ -2543,7 +2551,7 @@ class Period(_Period): raise ValueError("Only value or ordinal but not both should be " "given but not both") elif ordinal is not None: - if not util.is_integer_object(ordinal): + if not is_integer_object(ordinal): raise ValueError("Ordinal must be an integer") if freq is None: raise ValueError("Must supply freq for ordinal value") @@ -2582,8 +2590,8 @@ class Period(_Period): # if we have a non-hashable value. ordinal = NPY_NAT - elif isinstance(value, str) or util.is_integer_object(value): - if util.is_integer_object(value): + elif isinstance(value, str) or is_integer_object(value): + if is_integer_object(value): if value == NPY_NAT: value = "NaT" @@ -2615,7 +2623,7 @@ class Period(_Period): raise ValueError("Must supply freq for datetime value") if isinstance(dt, Timestamp): nanosecond = dt.nanosecond - elif util.is_datetime64_object(value): + elif is_datetime64_object(value): dt = Timestamp(value) if freq is None: raise ValueError("Must supply freq for datetime value") diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 73e9176d3a6d2..f65800a073f1f 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -41,13 +41,16 @@ from pandas._libs.tslibs.np_datetime cimport ( pydate_to_dt64, pydatetime_to_dt64, ) + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + from pandas._libs.tslibs.timestamps cimport _Timestamp -from pandas._libs.util cimport ( - is_datetime64_object, - is_float_object, - is_integer_object, -) + + +cdef extern from "pandas/type.h": + bint is_datetime64_object(object obj) + bint is_float_object(object obj) + bint is_integer_object(object obj) cnp.import_array() diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7810bc9f75e66..52fb2bffee9c9 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -28,8 +28,14 @@ from cpython.datetime cimport ( import_datetime() +cdef extern from "pandas/type.h": + bint is_array(object obj) + bint is_nan(object obj) + bint is_datetime64_object(object obj) + bint is_timedelta64_object(object obj) + bint is_float_object(object obj) + bint is_integer_object(object obj) -cimport pandas._libs.tslibs.util as util from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( cast_from_unit, @@ -66,13 +72,6 @@ from pandas._libs.tslibs.np_datetime import ( ) from pandas._libs.tslibs.offsets cimport is_tick_object -from pandas._libs.tslibs.util cimport ( - is_array, - is_datetime64_object, - is_float_object, - is_integer_object, - is_timedelta64_object, -) from pandas._libs.tslibs.fields import ( RoundTo, @@ -1129,7 +1128,7 @@ cdef class _Timedelta(timedelta): elif other is NaT: return op == Py_NE - elif util.is_array(other): + elif is_array(other): if other.dtype.kind == "m": return PyObject_RichCompare(self.asm8, other, op) elif other.dtype.kind == "O": @@ -1849,7 +1848,7 @@ class Timedelta(_Timedelta): def __mul__(self, other): if is_integer_object(other) or is_float_object(other): - if util.is_nan(other): + if is_nan(other): # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT return NaT @@ -1882,7 +1881,7 @@ class Timedelta(_Timedelta): elif is_integer_object(other) or is_float_object(other): # integers or floats - if util.is_nan(other): + if is_nan(other): return NaT return Timedelta._from_value_and_reso( (self.value / other), self._creso @@ -1936,7 +1935,7 @@ class Timedelta(_Timedelta): return self.value // other.value elif is_integer_object(other) or is_float_object(other): - if util.is_nan(other): + if is_nan(other): return NaT return type(self)._from_value_and_reso(self.value // other, self._creso) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7593be7bf77f3..3605c0a75f2e7 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -8,6 +8,7 @@ shadows the python class, where we do any heavy lifting. """ import warnings + cimport cython import numpy as np @@ -60,11 +61,12 @@ from pandas._libs.tslibs.dtypes cimport ( periods_per_day, periods_per_second, ) -from pandas._libs.tslibs.util cimport ( - is_array, - is_datetime64_object, - is_integer_object, -) + + +cdef extern from "pandas/type.h": + bint is_array(object obj) + bint is_datetime64_object(object obj) + bint is_integer_object(object obj) from pandas._libs.tslibs.fields import ( RoundTo, diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 6105f96a3b1b8..6be112c3f999a 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -37,10 +37,11 @@ from numpy cimport int64_t cnp.import_array() # ---------------------------------------------------------------------- -from pandas._libs.tslibs.util cimport ( - get_nat, - is_integer_object, -) +from pandas._libs.tslibs.util cimport get_nat + + +cdef extern from "pandas/type.h": + bint is_integer_object(object obj) cdef int64_t NPY_NAT = get_nat() diff --git a/setup.py b/setup.py index 41c6f0ad2cc62..bb36facd91637 100755 --- a/setup.py +++ b/setup.py @@ -442,11 +442,17 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.algos": { "pyxfile": "_libs/algos", "include": klib_include, - "depends": _pxi_dep["algos"], + "depends": _pxi_dep["algos"] + + [ + "pandas/_libs/src/pandas/type.h", + ], }, "_libs.arrays": {"pyxfile": "_libs/arrays"}, "_libs.groupby": {"pyxfile": "_libs/groupby"}, - "_libs.hashing": {"pyxfile": "_libs/hashing", "depends": []}, + "_libs.hashing": { + "pyxfile": "_libs/hashing", + "depends": ["pandas/_libs/src/pandas/type.h"], + }, "_libs.hashtable": { "pyxfile": "_libs/hashtable", "include": klib_include, @@ -458,23 +464,26 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.index": { "pyxfile": "_libs/index", "include": klib_include, - "depends": _pxi_dep["index"], + "depends": _pxi_dep["index"] + ["pandas/_libs/src/pandas/type.h"], }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, "_libs.interval": { "pyxfile": "_libs/interval", "include": klib_include, - "depends": _pxi_dep["interval"], + "depends": _pxi_dep["interval"] + ["pandas/_libs/src/pandas/type.h"], }, "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, "_libs.lib": { "pyxfile": "_libs/lib", - "depends": lib_depends + tseries_depends, + "depends": lib_depends + tseries_depends + ["pandas/_libs/src/pandas/type.h"], "include": klib_include, # due to tokenizer import "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, - "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends}, + "_libs.missing": { + "pyxfile": "_libs/missing", + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], + }, "_libs.parsers": { "pyxfile": "_libs/parsers", "include": klib_include + ["pandas/_libs/src"], @@ -487,7 +496,10 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas/_libs/src/parser/io.c", ], }, - "_libs.reduction": {"pyxfile": "_libs/reduction"}, + "_libs.reduction": { + "pyxfile": "_libs/reduction", + "depends": ["pandas/_libs/src/pandas/type.h"], + }, "_libs.ops": {"pyxfile": "_libs/ops"}, "_libs.ops_dispatch": {"pyxfile": "_libs/ops_dispatch"}, "_libs.properties": {"pyxfile": "_libs/properties"}, @@ -495,7 +507,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.sparse": {"pyxfile": "_libs/sparse", "depends": _pxi_dep["sparse"]}, "_libs.tslib": { "pyxfile": "_libs/tslib", - "depends": tseries_depends, + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.base": {"pyxfile": "_libs/tslibs/base"}, @@ -503,7 +515,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.dtypes": {"pyxfile": "_libs/tslibs/dtypes"}, "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", - "depends": tseries_depends, + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.fields": { @@ -514,7 +526,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"}, "_libs.tslibs.np_datetime": { "pyxfile": "_libs/tslibs/np_datetime", - "depends": tseries_depends, + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], "sources": [ "pandas/_libs/tslibs/src/datetime/np_datetime.c", "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", @@ -522,7 +534,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.tslibs.offsets": { "pyxfile": "_libs/tslibs/offsets", - "depends": tseries_depends, + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.parsing": { @@ -533,25 +545,28 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.tslibs.period": { "pyxfile": "_libs/tslibs/period", - "depends": tseries_depends, + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.strptime": { "pyxfile": "_libs/tslibs/strptime", - "depends": tseries_depends, + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timedeltas": { "pyxfile": "_libs/tslibs/timedeltas", - "depends": tseries_depends, + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timestamps": { "pyxfile": "_libs/tslibs/timestamps", - "depends": tseries_depends, + "depends": tseries_depends + ["pandas/_libs/src/pandas/type.h"], "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, - "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"}, + "_libs.tslibs.timezones": { + "pyxfile": "_libs/tslibs/timezones", + "depends": ["pandas/_libs/src/pandas/type.h"], + }, "_libs.tslibs.tzconversion": { "pyxfile": "_libs/tslibs/tzconversion", "depends": tseries_depends, From 2ed37aba95e5b0b490610a71fc7214a707c36b51 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 23 Dec 2022 16:52:21 -0800 Subject: [PATCH 4/4] proper NA handling --- pandas/_libs/pandas/type.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/pandas/type.h b/pandas/_libs/pandas/type.h index a17816b1ce4de..d238ae37fead4 100644 --- a/pandas/_libs/pandas/type.h +++ b/pandas/_libs/pandas/type.h @@ -9,6 +9,7 @@ The full license is in the LICENSE file, distributed with this software. #ifndef PANDAS__LIBS_PANDAS_TYPE_H_ #define PANDAS__LIBS_PANDAS_TYPE_H_ +#include "pyerrors.h" #ifdef __cplusplus extern "C" { #endif @@ -147,10 +148,22 @@ is_nan : bool */ int is_nan(PyObject *obj) { if (is_float_object(obj)) { - return PyObject_RichCompareBool(obj, obj, Py_NE); + double fobj = PyFloat_AsDouble(obj); + if (fobj == -1.0 && PyErr_Occurred()) { + // TODO(wayd): handle this error! + } + + return fobj != fobj; + } + + if (is_complex_object(obj)) { + int ret = PyObject_RichCompareBool(obj, obj, Py_NE) == 1; + if (ret == -1) { + // TODO(wayd): handle this error! + } } - return is_complex_object(obj) && PyObject_RichCompareBool(obj, obj, Py_NE); + return 0; } #ifdef __cplusplus