diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index ecfc7355dddfc..124792638e3df 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -129,7 +129,7 @@ def is_lexsorted(list list_of_arrays): for i in range(nlevels): arr = list_of_arrays[i] assert arr.dtype.name == 'int64' - vecs[i] = arr.data + vecs[i] = cnp.PyArray_DATA(arr) # Assume uniqueness?? with nogil: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 5e4a431caca00..5681d01c6bb25 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -7,10 +7,12 @@ from cython cimport Py_ssize_t from libc.stdlib cimport malloc, free import numpy as np +cimport numpy as cnp from numpy cimport (ndarray, double_t, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) +cnp.import_array() from util cimport numeric, get_nat @@ -118,7 +120,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, counts[:] = _counts[1:] data = np.empty((K, N), dtype=np.float64) - ptr = data.data + ptr = cnp.PyArray_DATA(data) take_2d_axis1_float64_float64(values.T, indexer, out=data) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 31ef4b7a3e807..5918560cf1436 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -37,7 +37,7 @@ cdef inline bint is_definitely_invalid_key(object val): return True # we have a _data, means we are a NDFrame - return (PySlice_Check(val) or cnp.PyArray_Check(val) + return (PySlice_Check(val) or util.is_array(val) or PyList_Check(val) or hasattr(val, '_data')) @@ -104,7 +104,7 @@ cdef class IndexEngine: void* data_ptr loc = self.get_loc(key) - if PySlice_Check(loc) or cnp.PyArray_Check(loc): + if PySlice_Check(loc) or util.is_array(loc): return arr[loc] else: return get_value_at(arr, loc, tz=tz) @@ -120,7 +120,7 @@ cdef class IndexEngine: loc = self.get_loc(key) value = convert_scalar(arr, value) - if PySlice_Check(loc) or cnp.PyArray_Check(loc): + if PySlice_Check(loc) or util.is_array(loc): arr[loc] = value else: util.set_value_at(arr, loc, value) diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 98eca92fd1ab2..753cba6ce62aa 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -16,8 +16,6 @@ The full license is in the LICENSE file, distributed with this software. #include "numpy/arrayscalars.h" -PANDAS_INLINE npy_int64 get_nat(void) { return NPY_MIN_INT64; } - PANDAS_INLINE int assign_value_1d(PyArrayObject* ap, Py_ssize_t _i, PyObject* v) { npy_intp i = (npy_intp)_i; @@ -40,16 +38,10 @@ PANDAS_INLINE const char* get_c_string(PyObject* obj) { #endif } -PANDAS_INLINE PyObject* char_to_string(const char* data) { -#if PY_VERSION_HEX >= 0x03000000 - return PyUnicode_FromString(data); -#else - return PyString_FromString(data); -#endif -} - void set_array_not_contiguous(PyArrayObject* ao) { - ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); + // Numpy>=1.8-compliant equivalent to: + // ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); + PyArray_CLEARFLAGS(ao, (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS)); } #endif // PANDAS__LIBS_SRC_NUMPY_HELPER_H_ diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7621ac912d4d5..4335e7baeafe9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -888,7 +888,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, trans, deltas, typ = get_dst_info(tz) - tdata = trans.data + tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) result_a = np.empty(n, dtype=np.int64) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index efdb1570ed878..624ed7ced2654 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,10 +1,18 @@ -from numpy cimport ndarray -cimport numpy as cnp -cnp.import_array() -cimport cpython from cpython cimport PyTypeObject +cdef extern from *: + """ + PyObject* char_to_string(const char* data) { + #if PY_VERSION_HEX >= 0x03000000 + return PyUnicode_FromString(data); + #else + return PyString_FromString(data); + #endif + } + """ + object char_to_string(const char* data) + cdef extern from "Python.h": # Note: importing extern-style allows us to declare these as nogil @@ -19,6 +27,8 @@ cdef extern from "Python.h": cdef extern from "numpy/arrayobject.h": PyTypeObject PyFloatingArrType_Type + ctypedef signed long long int64_t + int _import_array() except -1 cdef extern from "numpy/ndarrayobject.h": PyTypeObject PyTimedeltaArrType_Type @@ -29,142 +39,177 @@ cdef extern from "numpy/ndarrayobject.h": bint PyArray_IsIntegerScalar(obj) nogil bint PyArray_Check(obj) nogil +cdef extern from "numpy/npy_common.h": + int64_t NPY_MIN_INT64 + + +cdef extern from "../src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + + +cdef inline int64_t get_nat(): + return NPY_MIN_INT64 + + +cdef inline int import_array() except -1: + _import_array() + + # -------------------------------------------------------------------- # Type Checking cdef inline bint is_string_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, compat.string_types)` + + Parameters + ---------- + val : object + + Returns + ------- + is_string : bool + """ return PyString_Check(obj) or PyUnicode_Check(obj) cdef inline bint is_integer_object(object obj) nogil: + """ + Cython equivalent of + + `isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)` + + Parameters + ---------- + val : object + + Returns + ------- + is_integer : bool + + Notes + ----- + This counts np.timedelta64 objects as integers. + """ return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) cdef inline bint is_float_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (float, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_float : bool + """ return (PyFloat_Check(obj) or (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) cdef inline bint is_complex_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (complex, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_complex : bool + """ return (PyComplex_Check(obj) or PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) cdef inline bint is_bool_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (bool, np.bool_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_bool : bool + """ return (PyBool_Check(obj) or PyObject_TypeCheck(obj, &PyBoolArrType_Type)) cdef inline bint is_timedelta64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - - -cdef inline bint is_datetime64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - -# -------------------------------------------------------------------- - -cdef extern from "../src/numpy_helper.h": - void set_array_not_contiguous(ndarray ao) - - int assign_value_1d(ndarray, Py_ssize_t, object) except -1 - cnp.int64_t get_nat() - object get_value_1d(ndarray, Py_ssize_t) - const char *get_c_string(object) except NULL - object char_to_string(char*) - -ctypedef fused numeric: - cnp.int8_t - cnp.int16_t - cnp.int32_t - cnp.int64_t - - cnp.uint8_t - cnp.uint16_t - cnp.uint32_t - cnp.uint64_t - - cnp.float32_t - cnp.float64_t - -cdef extern from "../src/headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN - - -cdef inline object get_value_at(ndarray arr, object loc): - cdef: - Py_ssize_t i, sz - int casted + """ + Cython equivalent of `isinstance(val, np.timedelta64)` - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) + Parameters + ---------- + val : object - if i < 0 and sz > 0: - i += sz - elif i >= sz or sz == 0: - raise IndexError('index out of bounds') + Returns + ------- + is_timedelta64 : bool + """ + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - return get_value_1d(arr, i) +cdef inline bint is_datetime64_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, np.datetime64)` -cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): - """Sets a value into the array without checking the writeable flag. + Parameters + ---------- + val : object - This should be used when setting values in a loop, check the writeable - flag above the loop and then eschew the check on each iteration. + Returns + ------- + is_datetime64 : bool """ - cdef: - Py_ssize_t i, sz - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0: - i += sz - elif i >= sz: - raise IndexError('index out of bounds') + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - assign_value_1d(arr, i, value) -cdef inline set_value_at(ndarray arr, object loc, object value): - """Sets a value into the array after checking that the array is mutable. +cdef inline bint is_array(object val): """ - if not cnp.PyArray_ISWRITEABLE(arr): - raise ValueError('assignment destination is read-only') - - set_value_at_unsafe(arr, loc, value) + Cython equivalent of `isinstance(val, np.ndarray)` + Parameters + ---------- + val : object -cdef inline is_array(object o): - return cnp.PyArray_Check(o) + Returns + ------- + is_ndarray : bool + """ + return PyArray_Check(val) -cdef inline bint _checknull(object val): - try: - return val is None or (cpython.PyFloat_Check(val) and val != val) - except ValueError: - return False +cdef inline bint is_period_object(object val): + """ + Cython equivalent of `isinstance(val, pd.Period)` + Parameters + ---------- + val : object -cdef inline bint is_period_object(object val): + Returns + ------- + is_period : bool + """ return getattr(val, '_typ', '_typ') == 'period' @@ -181,3 +226,7 @@ cdef inline bint is_offset_object(object val): is_date_offset : bool """ return getattr(val, '_typ', None) == "dateoffset" + + +cdef inline bint _checknull(object val): + return val is None or (PyFloat_Check(val) and val != val) diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 0b7e66902cbb1..134f34330d8aa 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -1 +1,81 @@ from tslibs.util cimport * + +from cython cimport Py_ssize_t + +cimport numpy as cnp +from numpy cimport ndarray + + +cdef extern from "src/numpy_helper.h": + void set_array_not_contiguous(ndarray ao) + + int assign_value_1d(ndarray, Py_ssize_t, object) except -1 + object get_value_1d(ndarray, Py_ssize_t) + const char *get_c_string(object) except NULL + + +ctypedef fused numeric: + cnp.int8_t + cnp.int16_t + cnp.int32_t + cnp.int64_t + + cnp.uint8_t + cnp.uint16_t + cnp.uint32_t + cnp.uint64_t + + cnp.float32_t + cnp.float64_t + + +cdef inline object get_value_at(ndarray arr, object loc): + cdef: + Py_ssize_t i, sz + int casted + + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0 and sz > 0: + i += sz + elif i >= sz or sz == 0: + raise IndexError('index out of bounds') + + return get_value_1d(arr, i) + + +cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): + """Sets a value into the array without checking the writeable flag. + + This should be used when setting values in a loop, check the writeable + flag above the loop and then eschew the check on each iteration. + """ + cdef: + Py_ssize_t i, sz + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0: + i += sz + elif i >= sz: + raise IndexError('index out of bounds') + + assign_value_1d(arr, i, value) + + +cdef inline set_value_at(ndarray arr, object loc, object value): + """Sets a value into the array after checking that the array is mutable. + """ + if not cnp.PyArray_ISWRITEABLE(arr): + raise ValueError('assignment destination is read-only') + + set_value_at_unsafe(arr, loc, value)