From fa38001ca5fdbc09824de79ef749e77c0688b8fd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 6 Nov 2018 19:17:08 -0800 Subject: [PATCH 01/13] use float64_t instead of double --- pandas/_libs/algos.pyx | 7 +- pandas/_libs/groupby.pyx | 7 +- pandas/_libs/groupby_helper.pxi.in | 2 +- pandas/_libs/hashtable.pyx | 4 +- pandas/_libs/interval.pyx | 15 ++- pandas/_libs/intervaltree.pxi.in | 25 +--- pandas/_libs/join.pyx | 6 +- pandas/_libs/lib.pyx | 4 +- pandas/_libs/missing.pyx | 6 +- pandas/_libs/parsers.pyx | 36 +++--- pandas/_libs/window.pyx | 196 ++++++++++++++++------------- 11 files changed, 163 insertions(+), 145 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 075e2c5129579..fc73be9a21e63 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -15,8 +15,7 @@ from numpy cimport (ndarray, NPY_FLOAT32, NPY_FLOAT64, NPY_OBJECT, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t, - double_t) + uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() @@ -32,8 +31,8 @@ import missing cdef float64_t FP_ERR = 1e-13 -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN +cdef float64_t nan = NaN cdef int64_t iNaT = get_nat() diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 83ded64b742ed..bc15eee04d42d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -8,7 +8,6 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp from numpy cimport (ndarray, - double_t, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() @@ -22,8 +21,8 @@ from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers cdef int64_t iNaT = get_nat() -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN +cdef float64_t nan = NaN cdef inline float64_t median_linear(float64_t* a, int n) nogil: @@ -73,7 +72,7 @@ cdef inline float64_t kth_smallest_c(float64_t* a, Py_ssize_t n) nogil: cdef: Py_ssize_t i, j, l, m - double_t x, t + float64_t x, t l = 0 m = n - 1 diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 484a4b069305f..39e408660fd87 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" + float64_t NAN "NPY_NAN" _int64_max = np.iinfo(np.int64).max # ---------------------------------------------------------------------- diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index d38b72ccebbb2..c4dcd24142546 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -9,11 +9,11 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint32_t +from numpy cimport ndarray, uint8_t, uint32_t, float64_t cnp.import_array() cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" + float64_t NAN "NPY_NAN" from khash cimport ( diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index a395fdbabeca2..dae88d3b707bf 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -1,20 +1,27 @@ # -*- coding: utf-8 -*- import numbers +from operator import le, lt from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, PyObject_RichCompare) -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np -from numpy cimport ndarray +cimport numpy as cnp +from numpy cimport ( + int64_t, int32_t, float64_t, float32_t, uint64_t, + ndarray, + PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) +cnp.import_array() -from operator import le, lt cimport util util.import_array() +from hashtable cimport Int64Vector, Int64VectorData + from tslibs import Timestamp from tslibs.timezones cimport tz_compare diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 875848c00311f..d262ebef2bacf 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -4,21 +4,6 @@ Template for intervaltree WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -from numpy cimport ( - int64_t, int32_t, float64_t, float32_t, uint64_t, - ndarray, - PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) -import numpy as np - -cimport cython -from cython cimport Py_ssize_t - -cimport numpy as cnp -cnp.import_array() - -from hashtable cimport Int64Vector, Int64VectorData - - ctypedef fused scalar_t: float64_t float32_t @@ -26,10 +11,9 @@ ctypedef fused scalar_t: int32_t uint64_t - -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # IntervalTree -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- cdef class IntervalTree(IntervalMixin): """A centered interval tree @@ -202,9 +186,10 @@ cdef sort_values_and_indices(all_values, all_indices, subset): sorted_indices = take(indices, sorter) return sorted_values, sorted_indices -#---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- # Nodes -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # we need specialized nodes and leaves to optimize for different dtype and # closed values diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 748f3f265dd34..4a0944c6b1274 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -11,8 +11,8 @@ from numpy cimport (ndarray, cnp.import_array() -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN +cdef float64_t nan = NaN from pandas._libs.algos import groupsort_indexer, ensure_platform_int from pandas.core.algorithms import take_nd @@ -673,7 +673,7 @@ ctypedef fused asof_t: int32_t int64_t float - double + float64_t ctypedef fused by_t: object diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a9e0fcbc4a826..8265e193fbec0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -45,7 +45,7 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "src/parse_helper.h": - int floatify(object, double *result, int *maybe_int) except -1 + int floatify(object, float64_t *result, int *maybe_int) except -1 cimport util from util cimport (is_nan, @@ -71,7 +71,7 @@ cdef int64_t NPY_NAT = util.get_nat() iNaT = util.get_nat() cdef bint PY2 = sys.version_info[0] == 2 -cdef double nan = np.NaN +cdef float64_t nan = np.NaN def values_from_object(obj: object): diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index b8791359241ad..30c3a5ca541ae 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -5,7 +5,7 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport ndarray, int64_t, uint8_t +from numpy cimport ndarray, int64_t, uint8_t, float64_t cnp.import_array() cimport util @@ -13,8 +13,8 @@ cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value from tslibs.nattype import NaT -cdef double INF = np.inf -cdef double NEGINF = -INF +cdef float64_t INF = np.inf +cdef float64_t NEGINF = -INF cdef int64_t NPY_NAT = util.get_nat() diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 391de339ad60e..3870a55c22fd6 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -65,8 +65,8 @@ CParserError = ParserError cdef bint PY3 = (sys.version_info[0] >= 3) -cdef double INF = np.inf -cdef double NEGINF = -INF +cdef float64_t INF = np.inf +cdef float64_t NEGINF = -INF cdef extern from "errno.h": @@ -182,10 +182,10 @@ cdef extern from "parser/tokenizer.h": int64_t skip_first_N_rows int64_t skipfooter # pick one, depending on whether the converter requires GIL - double (*double_converter_nogil)(const char *, char **, - char, char, char, int) nogil - double (*double_converter_withgil)(const char *, char **, - char, char, char, int) + float64_t (*double_converter_nogil)(const char *, char **, + char, char, char, int) nogil + float64_t (*double_converter_withgil)(const char *, char **, + char, char, char, int) # error handling char *warn_msg @@ -233,12 +233,12 @@ cdef extern from "parser/tokenizer.h": uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, uint64_t uint_max, int *error, char tsep) nogil - double xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing) nogil - double precise_xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing) nogil - double round_trip(const char *p, char **q, char decimal, char sci, + float64_t xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing) nogil + float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing) nogil + float64_t round_trip(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing) nogil int to_boolean(const char *item, uint8_t *val) nogil @@ -1697,8 +1697,8 @@ cdef _try_double(parser_t *parser, int64_t col, coliter_t it const char *word = NULL char *p_end - double *data - double NA = na_values[np.float64] + float64_t *data + float64_t NA = na_values[np.float64] kh_float64_t *na_fset ndarray result khiter_t k @@ -1706,7 +1706,7 @@ cdef _try_double(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.float64) - data = result.data + data = result.data na_fset = kset_float64_from_list(na_flist) if parser.double_converter_nogil != NULL: # if it can run without the GIL with nogil: @@ -1717,8 +1717,8 @@ cdef _try_double(parser_t *parser, int64_t col, else: assert parser.double_converter_withgil != NULL error = _try_double_nogil(parser, - parser.double_converter_withgil, col, line_start, line_end, na_filter, na_hashset, use_na_flist, @@ -1730,14 +1730,14 @@ cdef _try_double(parser_t *parser, int64_t col, cdef inline int _try_double_nogil(parser_t *parser, - double (*double_converter)( + float64_t (*double_converter)( const char *, char **, char, char, char, int) nogil, int col, int line_start, int line_end, bint na_filter, kh_str_t *na_hashset, bint use_na_flist, const kh_float64_t *na_flist, - double NA, double *data, + float64_t NA, float64_t *data, int *na_count) nogil: cdef: int error, diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index bb7af67d14585..f517e0933264a 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -9,15 +9,15 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp -from numpy cimport ndarray, double_t, int64_t, float64_t, float32_t +from numpy cimport ndarray, int64_t, float64_t, float32_t cnp.import_array() cdef extern from "src/headers/cmath" namespace "std": - bint isnan(double) nogil - bint notnan(double) nogil - int signbit(double) nogil - double sqrt(double x) nogil + bint isnan(float64_t) nogil + bint notnan(float64_t) nogil + int signbit(float64_t) nogil + float64_t sqrt(float64_t x) nogil cimport util from util cimport numeric @@ -32,7 +32,7 @@ cdef float64_t MINfloat64 = np.NINF cdef float32_t MAXfloat32 = np.inf cdef float64_t MAXfloat64 = np.inf -cdef double NaN = np.NaN +cdef float64_t NaN = np.NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b @@ -80,6 +80,7 @@ def _check_minp(win, minp, N, floor=None): return max(minp, floor) + # original C implementation by N. Devillard. # This code in public domain. # Function : kth_smallest() @@ -352,19 +353,20 @@ def get_window_indexer(values, win, minp, index, closed, right_closed, index, floor) return indexer.get_data() + # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware -def roll_count(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_count(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, count_x = 0.0 + float64_t val, count_x = 0.0 int64_t s, e, nobs, N Py_ssize_t i, j ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, _ = get_window_indexer(values, win, minp, index, closed) @@ -406,12 +408,15 @@ def roll_count(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling sum -cdef inline double calc_sum(int64_t minp, int64_t nobs, double sum_x) nogil: - cdef double result +cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, + float64_t sum_x) nogil: + cdef: + float64_t result if nobs >= minp: result = sum_x @@ -421,7 +426,7 @@ cdef inline double calc_sum(int64_t minp, int64_t nobs, double sum_x) nogil: return result -cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: +cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogil: """ add a value from the sum calc """ # Not NaN @@ -430,7 +435,8 @@ cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: sum_x[0] = sum_x[0] + val -cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: +cdef inline void remove_sum(float64_t val, + int64_t *nobs, float64_t *sum_x) nogil: """ remove a value from the sum calc """ if notnan(val): @@ -438,15 +444,15 @@ cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: sum_x[0] = sum_x[0] - val -def roll_sum(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_sum(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev_x, sum_x = 0 + float64_t val, prev_x, sum_x = 0 int64_t s, e, range_endpoint int64_t nobs = 0, i, j, N bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -511,16 +517,18 @@ def roll_sum(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling mean -cdef inline double calc_mean(int64_t minp, Py_ssize_t nobs, - Py_ssize_t neg_ct, double sum_x) nogil: - cdef double result +cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, + Py_ssize_t neg_ct, float64_t sum_x) nogil: + cdef: + float64_t result if nobs >= minp: - result = sum_x / nobs + result = sum_x / nobs if neg_ct == 0 and result < 0: # all positive result = 0 @@ -534,7 +542,7 @@ cdef inline double calc_mean(int64_t minp, Py_ssize_t nobs, return result -cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, +cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct) nogil: """ add a value from the mean calc """ @@ -546,7 +554,7 @@ cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, neg_ct[0] = neg_ct[0] + 1 -cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, +cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct) nogil: """ remove a value from the mean calc """ @@ -557,15 +565,15 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_mean(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev_x, result, sum_x = 0 + float64_t val, prev_x, result, sum_x = 0 int64_t s, e bint is_variable Py_ssize_t nobs = 0, i, j, neg_ct = 0, N ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -627,13 +635,15 @@ def roll_mean(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling variance -cdef inline double calc_var(int64_t minp, int ddof, double nobs, - double ssqdm_x) nogil: - cdef double result +cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, + float64_t ssqdm_x) nogil: + cdef: + float64_t result # Variance is unchanged if no observation is added or removed if (nobs >= minp) and (nobs > ddof): @@ -642,7 +652,7 @@ cdef inline double calc_var(int64_t minp, int ddof, double nobs, if nobs == 1: result = 0 else: - result = ssqdm_x / (nobs - ddof) + result = ssqdm_x / (nobs - ddof) if result < 0: result = 0 else: @@ -651,10 +661,12 @@ cdef inline double calc_var(int64_t minp, int ddof, double nobs, return result -cdef inline void add_var(double val, double *nobs, double *mean_x, - double *ssqdm_x) nogil: +cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: """ add a value from the var calc """ - cdef double delta + cdef: + float64_t delta + # `isnan` instead of equality as fix for GH-21813, msvc 2017 bug if isnan(val): return @@ -667,10 +679,11 @@ cdef inline void add_var(double val, double *nobs, double *mean_x, ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0] -cdef inline void remove_var(double val, double *nobs, double *mean_x, - double *ssqdm_x) nogil: +cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: """ remove a value from the var calc """ - cdef double delta + cdef: + float64_t delta if notnan(val): nobs[0] = nobs[0] - 1 @@ -685,18 +698,19 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, ssqdm_x[0] = 0 -def roll_var(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_var(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed, int ddof=1): """ Numerically stable implementation using Welford's method. """ cdef: - double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta, mean_x_old + float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, + float64_t val, prev, delta, mean_x_old int64_t s, e bint is_variable Py_ssize_t i, j, N ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -785,13 +799,15 @@ def roll_var(ndarray[double_t] values, int64_t win, int64_t minp, # ---------------------------------------------------------------------- # Rolling skewness -cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, - double xxx) nogil: - cdef double result, dnobs - cdef double A, B, C, R +cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, R if nobs >= minp: - dnobs = nobs + dnobs = nobs A = x / dnobs B = xx / dnobs - A * A C = xxx / dnobs - A * A * A - 3 * A * B @@ -817,8 +833,9 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, return result -cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, - double *xxx) nogil: +cdef inline void add_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: """ add a value from the skew calc """ # Not NaN @@ -831,8 +848,9 @@ cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] + val * val * val -cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, - double *xxx) nogil: +cdef inline void remove_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: """ remove a value from the skew calc """ # Not NaN @@ -845,16 +863,16 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] - val * val * val -def roll_skew(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_skew(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev - double x = 0, xx = 0, xxx = 0 + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N int64_t s, e bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -915,17 +933,20 @@ def roll_skew(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling kurtosis -cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, - double xxx, double xxxx) nogil: - cdef double result, dnobs - cdef double A, B, C, D, R, K +cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx, float64_t xxxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, D, R, K if nobs >= minp: - dnobs = nobs + dnobs = nobs A = x / dnobs R = A * A B = xx / dnobs - R @@ -954,8 +975,9 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, return result -cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, - double *xxx, double *xxxx) nogil: +cdef inline void add_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: """ add a value from the kurotic calc """ # Not NaN @@ -969,8 +991,9 @@ cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, xxxx[0] = xxxx[0] + val * val * val * val -cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, - double *xxx, double *xxxx) nogil: +cdef inline void remove_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: """ remove a value from the kurotic calc """ # Not NaN @@ -984,16 +1007,16 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, xxxx[0] = xxxx[0] - val * val * val * val -def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_kurt(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev - double x = 0, xx = 0, xxx = 0, xxxx = 0 + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 int64_t nobs = 0, i, j, N int64_t s, e bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -1050,6 +1073,7 @@ def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling median, min, max @@ -1057,7 +1081,7 @@ def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, res, prev + float64_t val, res, prev bint err = 0, is_variable int ret = 0 skiplist_t *sl @@ -1065,7 +1089,7 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, int64_t nobs = 0, N, s, e int midpoint ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs @@ -1130,6 +1154,7 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, raise MemoryError("skiplist_insert failed") return output + # ---------------------------------------------------------------------- # Moving maximum / minimum code taken from Bottleneck under the terms @@ -1167,7 +1192,8 @@ cdef inline void remove_mm(numeric aold, Py_ssize_t *nobs) nogil: cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, numeric value) nogil: - cdef numeric result + cdef: + numeric result if numeric in cython.floating: if nobs >= minp: @@ -1252,7 +1278,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, Py_ssize_t nobs = 0 deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute - ndarray[double_t, ndim=1] output + ndarray[float64_t, ndim=1] output output = np.empty(N, dtype=float) Q = deque[int64_t]() @@ -1335,7 +1361,7 @@ cdef _roll_min_max_fixed(ndarray[numeric] values, numeric* minvalue numeric* end numeric* last - ndarray[double_t, ndim=1] output + ndarray[float64_t, ndim=1] output output = np.empty(N, dtype=float) # setup the rings of death! @@ -1427,19 +1453,19 @@ interpolation_types = { def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, int64_t minp, object index, object closed, - double quantile, str interpolation): + float64_t quantile, str interpolation): """ O(N log(window)) implementation using skip list """ cdef: - double val, prev, midpoint, idx_with_fraction + float64_t val, prev, midpoint, idx_with_fraction skiplist_t *skiplist int64_t nobs = 0, i, j, s, e, N Py_ssize_t idx bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output - double vlow, vhigh + ndarray[float64_t] output + float64_t vlow, vhigh InterpolationType interpolation_type int ret = 0 @@ -1529,7 +1555,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, elif interpolation_type == MIDPOINT: vlow = skiplist_get(skiplist, idx, &ret) vhigh = skiplist_get(skiplist, idx + 1, &ret) - output[i] = (vlow + vhigh) / 2 + output[i] = (vlow + vhigh) / 2 else: output[i] = NaN @@ -1543,7 +1569,7 @@ def roll_generic(object obj, int offset, object func, bint raw, object args, object kwargs): cdef: - ndarray[double_t] output, counts, bufarr + ndarray[float64_t] output, counts, bufarr ndarray[float64_t, cast=True] arr float64_t *buf float64_t *oldbuf @@ -1642,7 +1668,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] values, Assume len(weights) << len(values) """ cdef: - ndarray[double_t] output, tot_wgt, counts + ndarray[float64_t] output, tot_wgt, counts Py_ssize_t in_i, win_i, win_n, win_k, in_n, in_k float64_t val_in, val_win, c, w @@ -1703,7 +1729,8 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] values, # Exponentially weighted moving average -def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): +def ewma(float64_t[:] vals, float64_t com, + int adjust, int ignore_na, int minp): """ Compute exponentially-weighted moving average using center-of-mass. @@ -1722,8 +1749,8 @@ def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): cdef: Py_ssize_t N = len(vals) - ndarray[double_t] output = np.empty(N, dtype=float) - double alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur + ndarray[float64_t] output = np.empty(N, dtype=float) + float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur Py_ssize_t i, nobs if N == 0: @@ -1767,12 +1794,13 @@ def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): return output + # ---------------------------------------------------------------------- # Exponentially weighted moving covariance -def ewmcov(double_t[:] input_x, double_t[:] input_y, - double_t com, int adjust, int ignore_na, int minp, int bias): +def ewmcov(float64_t[:] input_x, float64_t[:] input_y, + float64_t com, int adjust, int ignore_na, int minp, int bias): """ Compute exponentially-weighted moving variance using center-of-mass. @@ -1793,10 +1821,10 @@ def ewmcov(double_t[:] input_x, double_t[:] input_y, cdef: Py_ssize_t N = len(input_x) - double alpha, old_wt_factor, new_wt, mean_x, mean_y, cov - double sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y Py_ssize_t i, nobs - ndarray[double_t] output + ndarray[float64_t] output if len(input_y) != N: raise ValueError("arrays are of different lengths " From af2787008c0eb8ad98283170387d7cc6b07aa188 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Nov 2018 13:10:43 -0800 Subject: [PATCH 02/13] make memoryview arguments const where needed; use C NAT lookups instead of python NaT lookups --- pandas/_libs/groupby.pyx | 24 +++++++-------- pandas/_libs/index.pyx | 6 ++-- pandas/_libs/lib.pyx | 23 +++++++-------- pandas/_libs/missing.pyx | 16 +++++----- pandas/_libs/tslib.pyx | 29 +++++++++--------- pandas/_libs/tslibs/conversion.pyx | 8 ++--- pandas/_libs/tslibs/nattype.pxd | 13 +++++++++ pandas/_libs/tslibs/nattype.pyx | 42 +++++++++++++++------------ pandas/_libs/tslibs/period.pyx | 22 +++++++------- pandas/_libs/tslibs/timedeltas.pyx | 34 +++++++++++----------- pandas/_libs/tslibs/timestamps.pyx | 17 +++++------ pandas/tests/series/test_internals.py | 29 +++++++++--------- pandas/tests/series/test_replace.py | 4 +-- 13 files changed, 138 insertions(+), 129 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index bc15eee04d42d..e0c93e11fc461 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from libc.stdlib cimport malloc, free @@ -108,7 +108,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, ngroups, size ndarray[int64_t] _counts - ndarray data + ndarray[float64_t, ndim=2] data float64_t* ptr assert min_count == -1, "'min_count' only used in add and prod" @@ -138,8 +138,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) def group_cumprod_float64(float64_t[:, :] out, - float64_t[:, :] values, - int64_t[:] labels, + const float64_t[:, :] values, + const int64_t[:] labels, bint is_datetimelike, bint skipna=True): """ @@ -176,7 +176,7 @@ def group_cumprod_float64(float64_t[:, :] out, @cython.wraparound(False) def group_cumsum(numeric[:, :] out, numeric[:, :] values, - int64_t[:] labels, + const int64_t[:] labels, is_datetimelike, bint skipna=True): """ @@ -216,7 +216,7 @@ def group_cumsum(numeric[:, :] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, +def group_shift_indexer(int64_t[:] out, const int64_t[:] labels, int ngroups, int periods): cdef: Py_ssize_t N, i, j, ii @@ -290,7 +290,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, """ cdef: Py_ssize_t i, N - ndarray[int64_t] sorted_labels + int64_t[:] sorted_labels int64_t idx, curr_fill_idx=-1, filled_vals=0 N = len(out) @@ -326,10 +326,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, @cython.boundscheck(False) @cython.wraparound(False) -def group_any_all(ndarray[uint8_t] out, - ndarray[int64_t] labels, - ndarray[uint8_t] values, - ndarray[uint8_t] mask, +def group_any_all(uint8_t[:] out, + const int64_t[:] labels, + const uint8_t[:] values, + const uint8_t[:] mask, object val_test, bint skipna): """Aggregated boolean values to show truthfulness of group elements diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index d418ac63a4ac8..7930f583274b5 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -25,7 +25,7 @@ from pandas._libs import algos, hashtable as _hash from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib from pandas._libs.missing import checknull -cdef int64_t iNaT = util.get_nat() +cdef int64_t NPY_NAT = util.get_nat() cdef inline bint is_definitely_invalid_key(object val): @@ -520,7 +520,7 @@ cpdef convert_scalar(ndarray arr, object value): elif isinstance(value, (datetime, np.datetime64, date)): return Timestamp(value).value elif value is None or value != value: - return iNaT + return NPY_NAT elif util.is_string_object(value): return Timestamp(value).value raise ValueError("cannot set a Timestamp with a non-timestamp") @@ -531,7 +531,7 @@ cpdef convert_scalar(ndarray arr, object value): elif isinstance(value, timedelta): return Timedelta(value).value elif value is None or value != value: - return iNaT + return NPY_NAT elif util.is_string_object(value): return Timedelta(value).value raise ValueError("cannot set a Timedelta with a non-timedelta") diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 8265e193fbec0..ec41a469e29a8 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -52,8 +52,8 @@ from util cimport (is_nan, UINT8_MAX, UINT64_MAX, INT64_MAX, INT64_MIN) from tslib import array_to_datetime -from tslibs.nattype import NaT from tslibs.conversion cimport convert_to_tsobject +from tslibs.nattype cimport NAT, NPY_NAT from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone, tz_compare @@ -67,9 +67,6 @@ cdef object oINT64_MAX = INT64_MAX cdef object oINT64_MIN = INT64_MIN cdef object oUINT64_MAX = UINT64_MAX -cdef int64_t NPY_NAT = util.get_nat() -iNaT = util.get_nat() - cdef bint PY2 = sys.version_info[0] == 2 cdef float64_t nan = np.NaN @@ -104,7 +101,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t: # ---------------------------------------------------------------------- -def is_scalar(val: object) -> bint: +def is_scalar(val: object) -> bool: """ Return True if given value is scalar. @@ -628,7 +625,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, nat_count = 0 if hasnans: - mask = values == iNaT + mask = values == NPY_NAT nat_count = np.sum(mask) values = values[~mask] @@ -1206,7 +1203,7 @@ def infer_dtype(value: object, skipna: bool=False) -> str: # np.datetime64('nat') and np.timedelta64('nat') if val is None or util.is_nan(val): pass - elif val is NaT: + elif val is NAT: seen_pdnat = True else: seen_val = True @@ -1335,7 +1332,7 @@ def infer_datetimelike_array(arr: object) -> object: elif v is None or util.is_nan(v): # nan or None pass - elif v is NaT: + elif v is NAT: seen_nat = 1 elif PyDateTime_Check(v): # datetime @@ -1647,12 +1644,12 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: for i in range(n): base_val = values[i] - if base_val is not NaT: + if base_val is not NAT: base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) for j in range(i, n): val = values[j] - if val is not NaT: + if val is not NAT: tz = getattr(val, 'tzinfo', None) if not tz_compare(base_tz, tz): return False @@ -1965,12 +1962,12 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if val is None: seen.null_ = 1 floats[i] = complexes[i] = fnan - elif val is NaT: + elif val is NAT: if convert_datetime: - idatetimes[i] = iNaT + idatetimes[i] = NPY_NAT seen.datetime_ = 1 if convert_timedelta: - itimedeltas[i] = iNaT + itimedeltas[i] = NPY_NAT seen.timedelta_ = 1 if not (convert_datetime or convert_timedelta): seen.object_ = 1 diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 30c3a5ca541ae..083cb71e0742a 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -11,7 +11,7 @@ cnp.import_array() cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value -from tslibs.nattype import NaT +from tslibs.nattype cimport NAT cdef float64_t INF = np.inf cdef float64_t NEGINF = -INF @@ -25,7 +25,7 @@ cdef inline bint _check_all_nulls(object val): if isinstance(val, (float, complex)): res = val != val - elif val is NaT: + elif val is NAT: res = 1 elif val is None: res = 1 @@ -65,7 +65,7 @@ cpdef bint checknull(object val): return val != val # and val != INF and val != NEGINF elif util.is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT - elif val is NaT: + elif val is NAT: return True elif util.is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT @@ -104,7 +104,7 @@ cpdef bint checknull_old(object val): return val != val or val == INF or val == NEGINF elif util.is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT - elif val is NaT: + elif val is NAT: return True elif util.is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT @@ -188,7 +188,7 @@ def isnaobj_old(ndarray arr): result = np.zeros(n, dtype=np.uint8) for i in range(n): val = arr[i] - result[i] = val is NaT or _check_none_nan_inf_neginf(val) + result[i] = val is NAT or _check_none_nan_inf_neginf(val) return result.view(np.bool_) @@ -297,7 +297,7 @@ cdef inline bint is_null_datetime64(v): # excluding np.timedelta64('nat') if v is None or util.is_nan(v): return True - elif v is NaT: + elif v is NAT: return True elif util.is_datetime64_object(v): return v.view('int64') == NPY_NAT @@ -309,7 +309,7 @@ cdef inline bint is_null_timedelta64(v): # excluding np.datetime64('nat') if v is None or util.is_nan(v): return True - elif v is NaT: + elif v is NAT: return True elif util.is_timedelta64_object(v): return v.view('int64') == NPY_NAT @@ -321,6 +321,6 @@ cdef inline bint is_null_period(v): # excluding np.datetime64('nat') and np.timedelta64('nat') if v is None or util.is_nan(v): return True - elif v is NaT: + elif v is NAT: return True return False diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9012ebefe0975..e50e4011ef62f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -37,8 +37,9 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject, get_datetime64_nanos, tz_convert_utc_to_tzlocal) -from tslibs.nattype import NaT, nat_strings, iNaT -from tslibs.nattype cimport checknull_with_nat, NPY_NAT +# _many_ modules still look for NaT and iNaT here despite them not being needed +from tslibs.nattype import nat_strings, NaT, iNaT # noqa:F821 +from tslibs.nattype cimport checknull_with_nat, NPY_NAT, NAT from tslibs.offsets cimport to_offset @@ -125,7 +126,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NaT + result[i] = NAT else: dt64_to_dtstruct(value, &dts) result[i] = func_create(value, dts, tz, freq) @@ -133,7 +134,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NaT + result[i] = NAT else: # Python datetime objects do not support nanosecond # resolution (yet, PEP 564). Need to compute new value @@ -150,7 +151,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NaT + result[i] = NAT else: # Adjust datetime64 timestamp, recompute datetimestruct dt64_to_dtstruct(value + delta, &dts) @@ -162,7 +163,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NaT + result[i] = NAT else: # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(value, side='right') - 1 @@ -173,7 +174,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NaT + result[i] = NAT else: # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(value, side='right') - 1 @@ -335,7 +336,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): # then need to iterate try: iresult = values.astype('i8', casting='same_kind', copy=False) - mask = iresult == iNaT + mask = iresult == NPY_NAT iresult[mask] = 0 fvalues = iresult.astype('f8') * m need_to_iterate = False @@ -351,7 +352,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): "'{unit}'".format(unit=unit)) result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') - iresult[mask] = iNaT + iresult[mask] = NPY_NAT return result result = np.empty(n, dtype='M8[ns]') @@ -428,11 +429,11 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): val = values[i] if checknull_with_nat(val): - oresult[i] = NaT + oresult[i] = NAT elif is_integer_object(val) or is_float_object(val): if val != val or val == NPY_NAT: - oresult[i] = NaT + oresult[i] = NAT else: try: oresult[i] = Timestamp(cast_from_unit(val, unit)) @@ -441,7 +442,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): elif is_string_object(val): if len(val) == 0 or val in nat_strings: - oresult[i] = NaT + oresult[i] = NAT else: oresult[i] = val @@ -739,10 +740,10 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if isinstance(val, float): oresult[i] = np.nan else: - oresult[i] = NaT + oresult[i] = NAT elif is_datetime64_object(val): if get_datetime64_value(val) == NPY_NAT: - oresult[i] = NaT + oresult[i] = NAT else: oresult[i] = val.item() else: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f88671b41a16a..6120543b74816 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -35,8 +35,8 @@ from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, get_timezone, maybe_get_tz, tz_compare) from parsing import parse_datetime_string -from nattype import nat_strings, NaT -from nattype cimport NPY_NAT, checknull_with_nat +from nattype import nat_strings +from nattype cimport NPY_NAT, checknull_with_nat, NAT # ---------------------------------------------------------------------- # Constants @@ -277,7 +277,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, if is_string_object(ts): return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) - if ts is None or ts is NaT: + if ts is None or ts is NAT: obj.value = NPY_NAT elif is_datetime64_object(ts): if ts.view('i8') == NPY_NAT: @@ -425,7 +425,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, assert is_string_object(ts) if len(ts) == 0 or ts in nat_strings: - ts = NaT + ts = NAT elif ts == 'now': # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns utc diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd index 382ac9d323918..dc3bb5220edd9 100644 --- a/pandas/_libs/tslibs/nattype.pxd +++ b/pandas/_libs/tslibs/nattype.pxd @@ -7,3 +7,16 @@ cdef bint _nat_scalar_rules[6] cdef bint checknull_with_nat(object val) cdef bint is_null_datetimelike(object val) + +from cpython.datetime cimport datetime + + +cdef class _NaT(datetime): + cdef readonly: + int64_t value + object freq + +# By declaring NAT in the .pxd file and cimporting it into other cython +# modules, we make `if thing is NaT` into a C pointer lookup and avoid a +# runtime lookup of NaT in the module namespace. +cdef _NaT NAT diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index a010cbf76cf5d..4320e03a9e143 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -47,7 +47,7 @@ def _make_nan_func(func_name, doc): def _make_nat_func(func_name, doc): def f(*args, **kwargs): - return NaT + return NAT f.__name__ = func_name f.__doc__ = doc return f @@ -67,10 +67,10 @@ def _make_error_func(func_name, cls): cdef _nat_divide_op(self, other): - if PyDelta_Check(other) or is_timedelta64_object(other) or other is NaT: + if PyDelta_Check(other) or is_timedelta64_object(other) or other is NAT: return np.nan if is_integer_object(other) or is_float_object(other): - return NaT + return NAT return NotImplemented @@ -82,15 +82,16 @@ cdef _nat_rdivide_op(self, other): def __nat_unpickle(*args): # return constant defined in the module - return NaT + return NAT # ---------------------------------------------------------------------- cdef class _NaT(datetime): - cdef readonly: - int64_t value - object freq + # Actual declarations are in the accompanying .pxd file + # cdef readonly: + # int64_t value + # object freq def __hash__(_NaT self): # py3k needs this defined here @@ -116,26 +117,26 @@ cdef class _NaT(datetime): def __add__(self, other): if PyDateTime_Check(other): - return NaT + return NAT elif hasattr(other, 'delta'): # Timedelta, offsets.Tick, offsets.Week - return NaT + return NAT elif getattr(other, '_typ', None) in ['dateoffset', 'series', 'period', 'datetimeindex', 'timedeltaindex']: # Duplicate logic in _Timestamp.__add__ to avoid needing # to subclass; allows us to @final(_Timestamp.__add__) return NotImplemented - return NaT + return NAT def __sub__(self, other): # Duplicate some logic from _Timestamp.__sub__ to avoid needing # to subclass; allows us to @final(_Timestamp.__sub__) if PyDateTime_Check(other): - return NaT + return NAT elif PyDelta_Check(other): - return NaT + return NAT elif getattr(other, '_typ', None) == 'datetimeindex': # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex @@ -154,13 +155,13 @@ cdef class _NaT(datetime): 'periodindex', 'dateoffset']: return NotImplemented - return NaT + return NAT def __pos__(self): - return NaT + return NAT def __neg__(self): - return NaT + return NAT def __div__(self, other): return _nat_divide_op(self, other) @@ -173,7 +174,7 @@ cdef class _NaT(datetime): def __mul__(self, other): if is_integer_object(other) or is_float_object(other): - return NaT + return NAT return NotImplemented @property @@ -271,7 +272,7 @@ class NaTType(_NaT): def __rmul__(self, other): if is_integer_object(other) or is_float_object(other): - return NaT + return NAT return NotImplemented # ---------------------------------------------------------------------- @@ -660,13 +661,16 @@ class NaTType(_NaT): NaT = NaTType() +# NAT is a C alias for NaT that can be checked for "if thing is NAT" without +# having to do a module-level lookup for NaT. +cdef _NaT NAT = NaT # ---------------------------------------------------------------------- cdef inline bint checknull_with_nat(object val): """ utility to check if a value is a nat or not """ - return val is None or util.is_nan(val) or val is NaT + return val is None or util.is_nan(val) or val is NAT cdef inline bint is_null_datetimelike(object val): @@ -683,7 +687,7 @@ cdef inline bint is_null_datetimelike(object val): """ if val is None or util.is_nan(val): return True - elif val is NaT: + elif val is NAT: return True elif util.is_timedelta64_object(val): return val.view('int64') == NPY_NAT diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index ebcbea0ee30b3..34bfaf0b31bce 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -46,8 +46,8 @@ from frequencies cimport (get_freq_code, get_base_alias, get_rule_month) from parsing import parse_time_string from resolution import Resolution -from nattype import nat_strings, NaT -from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike +from nattype import nat_strings +from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike, NAT from offsets cimport to_offset from offsets import _Tick @@ -1194,7 +1194,7 @@ def period_format(int64_t value, int freq, object fmt=None): int freq_group if value == NPY_NAT: - return repr(NaT) + return repr(NAT) if fmt is None: freq_group = get_freq_group(freq) @@ -1459,7 +1459,7 @@ def extract_ordinals(object[:] values, freq): except AttributeError: p = Period(p, freq=freq) - if p is NaT: + if p is NAT: # input may contain NaT-like string ordinals[i] = NPY_NAT else: @@ -1587,7 +1587,7 @@ cdef class _Period(object): Fast creation from an ordinal and freq that are already validated! """ if ordinal == NPY_NAT: - return NaT + return NAT else: freq = cls._maybe_convert_freq(freq) self = _Period.__new__(cls, ordinal, freq) @@ -1599,7 +1599,7 @@ cdef class _Period(object): msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) - elif other is NaT: + elif other is NAT: return _nat_scalar_rules[op] # index/series like elif hasattr(other, '_typ'): @@ -1647,8 +1647,8 @@ cdef class _Period(object): if (PyDelta_Check(other) or util.is_timedelta64_object(other) or util.is_offset_object(other)): return self._add_delta(other) - elif other is NaT: - return NaT + elif other is NAT: + return NAT elif util.is_integer_object(other): maybe_integer_op_deprecated(self) @@ -1695,8 +1695,8 @@ cdef class _Period(object): else: # pragma: no cover return NotImplemented elif is_period_object(other): - if self is NaT: - return NaT + if self is NAT: + return NAT return NotImplemented else: return NotImplemented @@ -2458,7 +2458,7 @@ class Period(_Period): value = str(value) value = value.upper() dt, _, reso = parse_time_string(value, freq) - if dt is NaT: + if dt is NAT: ordinal = NPY_NAT if freq is None: diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c09a8e5b395ee..13f4dc7cadf48 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -31,8 +31,8 @@ from util cimport (is_timedelta64_object, is_datetime64_object, from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) -from nattype import nat_strings, NaT -from nattype cimport checknull_with_nat, NPY_NAT +from nattype import nat_strings +from nattype cimport checknull_with_nat, NPY_NAT, NAT from offsets cimport to_offset # ---------------------------------------------------------------------- @@ -119,7 +119,7 @@ def ints_to_pytimedelta(int64_t[:] arr, box=False): value = arr[i] if value == NPY_NAT: - result[i] = NaT + result[i] = NAT else: if box: result[i] = Timedelta(value) @@ -568,8 +568,8 @@ def _binary_op_method_timedeltalike(op, name): return op(self, other.delta) return NotImplemented - elif other is NaT: - return NaT + elif other is NAT: + return NAT elif is_timedelta64_object(other): # convert to Timedelta below; avoid catching this in @@ -603,9 +603,9 @@ def _binary_op_method_timedeltalike(op, name): # failed to parse as timedelta return NotImplemented - if other is NaT: + if other is NAT: # e.g. if original other was timedelta64('NaT') - return NaT + return NAT return Timedelta(op(self.value, other.value), unit='ns') f.__name__ = name @@ -1171,7 +1171,7 @@ class Timedelta(_Timedelta): unit = parse_timedelta_unit(unit) value = convert_to_timedelta64(value, unit) elif checknull_with_nat(value): - return NaT + return NAT else: raise ValueError( "Value must be Timedelta, string, integer, " @@ -1182,7 +1182,7 @@ class Timedelta(_Timedelta): # nat if value == NPY_NAT: - return NaT + return NAT # make timedelta happy td_base = _Timedelta.__new__(cls, microseconds=int(value) / 1000) @@ -1270,13 +1270,13 @@ class Timedelta(_Timedelta): # i.e. np.nan, but also catch np.float64("NaN") which would # otherwise get caught by the hasattr(other, "dtype") branch # incorrectly return a np.timedelta64 object. - return NaT + return NAT elif hasattr(other, 'dtype'): # ndarray-like return other * self.to_timedelta64() - elif other is NaT: + elif other is NAT: raise TypeError('Cannot multiply Timedelta with NaT') elif not (is_integer_object(other) or is_float_object(other)): @@ -1303,7 +1303,7 @@ class Timedelta(_Timedelta): # i.e. np.nan, but also catch np.float64("NaN") which would # otherwise get caught by the hasattr(other, "dtype") branch # incorrectly return a np.timedelta64 object. - return NaT + return NAT elif hasattr(other, 'dtype'): return self.to_timedelta64() / other @@ -1316,7 +1316,7 @@ class Timedelta(_Timedelta): return NotImplemented other = Timedelta(other) - if other is NaT: + if other is NAT: return np.nan return self.value / float(other.value) @@ -1339,8 +1339,8 @@ class Timedelta(_Timedelta): return NotImplemented other = Timedelta(other) - if other is NaT: - return NaT + if other is NAT: + return NAT return float(other.value) / self.value if not PY3: @@ -1382,7 +1382,7 @@ class Timedelta(_Timedelta): return NotImplemented other = Timedelta(other) - if other is NaT: + if other is NAT: return np.nan return self.value // other.value @@ -1428,7 +1428,7 @@ class Timedelta(_Timedelta): return NotImplemented other = Timedelta(other) - if other is NaT: + if other is NAT: return np.nan return other.value // self.value diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d5bd2e90af3a7..2724c53dbaee6 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import enum import warnings from cpython cimport (PyObject_RichCompareBool, PyObject_RichCompare, @@ -22,10 +23,8 @@ cimport ccalendar from conversion import tz_localize_to_utc, normalize_i8_timestamps from conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) -import enum from fields import get_start_end_field, get_date_name_field -from nattype import NaT -from nattype cimport NPY_NAT +from nattype cimport NPY_NAT, NAT from np_datetime import OutOfBoundsDatetime from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, npy_datetimestruct, dt64_to_dtstruct) @@ -204,7 +203,7 @@ cdef class _Timestamp(datetime): if isinstance(other, _Timestamp): ots = other - elif other is NaT: + elif other is NAT: return op == Py_NE elif PyDateTime_Check(other): if self.nanosecond == 0: @@ -339,9 +338,9 @@ cdef class _Timestamp(datetime): elif is_integer_object(other): maybe_integer_op_deprecated(self) - if self is NaT: + if self is NAT: # to be compat with Period - return NaT + return NAT elif self.freq is None: raise ValueError("Cannot add integral value to Timestamp " "without freq.") @@ -383,8 +382,8 @@ cdef class _Timestamp(datetime): elif getattr(other, '_typ', None) == 'timedeltaindex': return (-other).__add__(self) - elif other is NaT: - return NaT + elif other is NAT: + return NAT # coerce if necessary if we are a Timestamp-like if (PyDateTime_Check(self) @@ -730,7 +729,7 @@ class Timestamp(_Timestamp): ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) if ts.value == NPY_NAT: - return NaT + return NAT if is_string_object(freq): freq = to_offset(freq) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 8e3b0d19447a1..e1e9d5c186e4a 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -4,11 +4,8 @@ from datetime import datetime import numpy as np -from numpy import nan import pytest -import pandas._libs.lib as lib - import pandas as pd from pandas import Series from pandas.core.indexes.datetimes import Timestamp @@ -87,7 +84,7 @@ def test_convert_objects(self): expected = Series([Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103'), - lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), + pd.NaT, pd.NaT, pd.NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', @@ -103,7 +100,7 @@ def test_convert_objects(self): with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) - expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2) + expected = Series([pd.NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object @@ -149,14 +146,14 @@ def test_convert(self): # Test coercion returns correct type s = Series(['a', 'b', 'c']) results = s._convert(datetime=True, coerce=True) - expected = Series([lib.NaT] * 3) + expected = Series([pd.NaT] * 3) assert_series_equal(results, expected) results = s._convert(numeric=True, coerce=True) expected = Series([np.nan] * 3) assert_series_equal(results, expected) - expected = Series([lib.NaT] * 3, dtype=np.dtype('m8[ns]')) + expected = Series([pd.NaT] * 3, dtype=np.dtype('m8[ns]')) results = s._convert(timedelta=True, coerce=True) assert_series_equal(results, expected) @@ -166,15 +163,15 @@ def test_convert(self): # Test coercion with mixed types s = Series(['a', '3.1415', dt, td]) results = s._convert(datetime=True, coerce=True) - expected = Series([lib.NaT, lib.NaT, dt, lib.NaT]) + expected = Series([pd.NaT, pd.NaT, dt, pd.NaT]) assert_series_equal(results, expected) results = s._convert(numeric=True, coerce=True) - expected = Series([nan, 3.1415, nan, nan]) + expected = Series([np.nan, 3.1415, np.nan, np.nan]) assert_series_equal(results, expected) results = s._convert(timedelta=True, coerce=True) - expected = Series([lib.NaT, lib.NaT, lib.NaT, td], + expected = Series([pd.NaT, pd.NaT, pd.NaT, td], dtype=np.dtype('m8[ns]')) assert_series_equal(results, expected) @@ -182,7 +179,7 @@ def test_convert(self): results = s._convert(datetime=True) assert_series_equal(results, s) results = s._convert(numeric=True) - expected = Series([nan, 3.1415, nan, nan]) + expected = Series([np.nan, 3.1415, np.nan, np.nan]) assert_series_equal(results, expected) results = s._convert(timedelta=True) assert_series_equal(results, s) @@ -231,13 +228,13 @@ def test_convert(self): r['a'] = 'garbled' result = r._convert(numeric=True) expected = s.copy() - expected['a'] = nan + expected['a'] = np.nan assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) s = Series([1, 'na', 3, 4]) result = s._convert(datetime=True, numeric=True) - expected = Series([1, nan, 3, 4]) + expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) s = Series([1, '', 3, 4]) @@ -260,7 +257,7 @@ def test_convert(self): assert_series_equal(result, expected) expected = Series([Timestamp('20010101'), Timestamp('20010102'), - Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT, + Timestamp('20010103'), pd.NaT, pd.NaT, pd.NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') result = s2._convert(datetime=True, numeric=False, timedelta=False, @@ -271,7 +268,7 @@ def test_convert(self): s = Series(['foo', 'bar', 1, 1.0], dtype='O') result = s._convert(datetime=True, coerce=True) - expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2) + expected = Series([pd.NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object @@ -285,7 +282,7 @@ def test_convert(self): # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date - expected = Series([lib.NaT]) + expected = Series([pd.NaT]) for x in 'abcdefghijklmnopqrstuvwxyz': s = Series([x]) result = s._convert(datetime=True, coerce=True) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 67c75f43e030c..7efde1fbdd1f5 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -4,8 +4,6 @@ import numpy as np import pytest -import pandas._libs.lib as lib - import pandas as pd import pandas.util.testing as tm @@ -65,7 +63,7 @@ def test_replace(self): ser = pd.Series([np.nan, 0, np.inf]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) - ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, lib.NaT]) + ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, pd.NaT]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) filled = ser.copy() filled[4] = 0 From 367969e3dab9d5530eb7023d96d64e58e9a27acf Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Nov 2018 17:15:54 -0800 Subject: [PATCH 03/13] remove boundschecks --- pandas/_libs/tslib.pyx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e50e4011ef62f..8e27acc127d48 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import cython from cython import Py_ssize_t from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, @@ -72,6 +73,8 @@ cdef inline object create_time_from_ts( return time(dts.hour, dts.min, dts.sec, dts.us, tz) +@cython.wraparound(False) +@cython.boundscheck(False) def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp @@ -214,6 +217,8 @@ def _test_parse_iso8601(object ts): return Timestamp(obj.value) +@cython.wraparound(False) +@cython.boundscheck(False) def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object format=None, object na_rep=None): """ @@ -450,6 +455,8 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): return oresult +@cython.wraparound(False) +@cython.boundscheck(False) cpdef array_to_datetime(ndarray[object] values, errors='raise', dayfirst=False, yearfirst=False, format=None, utc=None, @@ -753,6 +760,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) +@cython.wraparound(False) +@cython.boundscheck(False) cdef array_to_datetime_object(ndarray[object] values, bint is_raise, dayfirst=False, yearfirst=False): """ From 4f4d1cffe23198d4244947c5ee09d911b748c180 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Nov 2018 19:30:03 -0800 Subject: [PATCH 04/13] de-duplicate using checknull_with_nat --- pandas/_libs/missing.pyx | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 083cb71e0742a..7c6cae4887f5f 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -11,7 +11,7 @@ cnp.import_array() cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value -from tslibs.nattype cimport NAT +from tslibs.nattype cimport NAT, checknull_with_nat cdef float64_t INF = np.inf cdef float64_t NEGINF = -INF @@ -295,9 +295,7 @@ def isneginf_scalar(val: object) -> bool: cdef inline bint is_null_datetime64(v): # determine if we have a null for a datetime (or integer versions), # excluding np.timedelta64('nat') - if v is None or util.is_nan(v): - return True - elif v is NAT: + if checknull_with_nat(v): return True elif util.is_datetime64_object(v): return v.view('int64') == NPY_NAT @@ -307,9 +305,7 @@ cdef inline bint is_null_datetime64(v): cdef inline bint is_null_timedelta64(v): # determine if we have a null for a timedelta (or integer versions), # excluding np.datetime64('nat') - if v is None or util.is_nan(v): - return True - elif v is NAT: + if checknull_with_nat(v): return True elif util.is_timedelta64_object(v): return v.view('int64') == NPY_NAT @@ -319,8 +315,4 @@ cdef inline bint is_null_timedelta64(v): cdef inline bint is_null_period(v): # determine if we have a null for a Period (or integer versions), # excluding np.datetime64('nat') and np.timedelta64('nat') - if v is None or util.is_nan(v): - return True - elif v is NAT: - return True - return False + return checknull_with_nat(v) From d1511f7001d7251bb1820a4b35ed8260dbcdb33a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Nov 2018 21:00:31 -0800 Subject: [PATCH 05/13] remove non-standard imports of np.nan --- pandas/tests/arrays/sparse/test_array.py | 4 ++-- pandas/tests/frame/test_operators.py | 3 +-- pandas/tests/frame/test_repr_info.py | 5 ++--- pandas/tests/frame/test_timeseries.py | 7 +++---- pandas/tests/frame/test_to_csv.py | 3 +-- pandas/tests/groupby/aggregate/test_cython.py | 5 ++--- pandas/tests/series/test_operators.py | 9 ++++----- 7 files changed, 15 insertions(+), 21 deletions(-) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 852c4fb910560..32e687c4a958a 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -3,7 +3,6 @@ import warnings import numpy as np -from numpy import nan import pytest from pandas._libs.sparse import IntIndex @@ -24,7 +23,8 @@ def kind(request): class TestSparseArray(object): def setup_method(self, method): - self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) + self.arr_data = np.array([np.nan, np.nan, 1, 2, 3, + np.nan, 4, 5, np.nan, 6]) self.arr = SparseArray(self.arr_data) self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 65459735e639b..652370d5529c0 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -6,7 +6,6 @@ import pytest -from numpy import nan import numpy as np from pandas.compat import range @@ -328,7 +327,7 @@ def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] - frame_copy['C'][:5] = nan + frame_copy['C'][:5] = np.nan added = self.frame + frame_copy diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 668613c494a47..01dee47fffe49 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -7,7 +7,6 @@ import sys import textwrap -from numpy import nan import numpy as np import pytest @@ -49,8 +48,8 @@ def test_repr_mixed_big(self): biggie = DataFrame({'A': np.random.randn(200), 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan foo = repr(biggie) # noqa diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index eecbdc0130f02..1c354c25c4439 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -6,7 +6,6 @@ import pytest -from numpy import nan from numpy.random import randn import numpy as np @@ -517,8 +516,8 @@ def test_first_last_valid(self, data, idx, expected_first, expected_last): N = len(self.frame.index) mat = randn(N) - mat[:5] = nan - mat[-5:] = nan + mat[:5] = np.nan + mat[-5:] = np.nan frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() @@ -534,7 +533,7 @@ def test_first_last_valid(self, data, idx, assert empty.first_valid_index() is None # GH17400: no valid entries - frame[:] = nan + frame[:] = np.nan assert frame.last_valid_index() is None assert frame.first_valid_index() is None diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index aa91b7510a2b5..cf6a556ca58e9 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -6,7 +6,6 @@ import csv import pytest -from numpy import nan import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) @@ -52,7 +51,7 @@ def test_from_csv_deprecation(self): def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: - self.frame['A'][:5] = nan + self.frame['A'][:5] = np.nan self.frame.to_csv(path) self.frame.to_csv(path, columns=['A', 'B']) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index d8a545b323674..578fc4ab42d6a 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -9,7 +9,6 @@ import pytest import numpy as np -from numpy import nan import pandas as pd from pandas import (bdate_range, DataFrame, Index, Series, Timestamp, @@ -36,11 +35,11 @@ 'max', ]) def test_cythonized_aggers(op_name): - data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], + data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., np.nan, np.nan], 'B': ['A', 'B'] * 6, 'C': np.random.randn(12)} df = DataFrame(data) - df.loc[2:10:2, 'C'] = nan + df.loc[2:10:2, 'C'] = np.nan op = lambda x: getattr(x, op_name)() diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 35bd99ff2eda8..df7966d8323e3 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -5,7 +5,6 @@ import operator import numpy as np -from numpy import nan import pytest import pandas.compat as compat @@ -750,12 +749,12 @@ def _check_fill(meth, op, a, b, fill_value=0): with np.errstate(all='ignore'): if amask[i]: if bmask[i]: - exp_values.append(nan) + exp_values.append(np.nan) continue exp_values.append(op(fill_value, b[i])) elif bmask[i]: if amask[i]: - exp_values.append(nan) + exp_values.append(np.nan) continue exp_values.append(op(a[i], fill_value)) else: @@ -765,8 +764,8 @@ def _check_fill(meth, op, a, b, fill_value=0): expected = Series(exp_values, exp_index) assert_series_equal(result, expected) - a = Series([nan, 1., 2., 3., nan], index=np.arange(5)) - b = Series([nan, 1, nan, 3, nan, 4.], index=np.arange(6)) + a = Series([np.nan, 1., 2., 3., np.nan], index=np.arange(5)) + b = Series([np.nan, 1, np.nan, 3, np.nan, 4.], index=np.arange(6)) result = op(a, b) exp = equiv_op(a, b) From c9ef170289d418657499ddfd819b7b0146f3ebb9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Nov 2018 08:00:35 -0800 Subject: [PATCH 06/13] revert not-worth it NAT, remove extraneous nan --- pandas/_libs/algos.pyx | 1 - pandas/_libs/algos_rank_helper.pxi.in | 4 +-- pandas/_libs/groupby.pyx | 1 - pandas/_libs/groupby_helper.pxi.in | 2 +- pandas/_libs/hashtable.pyx | 2 -- pandas/_libs/hashtable_class_helper.pxi.in | 2 +- pandas/_libs/join.pyx | 4 --- pandas/_libs/lib.pyx | 25 ++++++------- pandas/_libs/missing.pyx | 11 +++--- pandas/_libs/tslib.pyx | 24 ++++++------- pandas/_libs/tslibs/conversion.pyx | 8 ++--- pandas/_libs/tslibs/nattype.pxd | 13 ------- pandas/_libs/tslibs/nattype.pyx | 42 ++++++++++------------ pandas/_libs/tslibs/period.pyx | 22 ++++++------ pandas/_libs/tslibs/timedeltas.pyx | 34 +++++++++--------- pandas/_libs/tslibs/timestamps.pyx | 15 ++++---- 16 files changed, 94 insertions(+), 116 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index fc73be9a21e63..8567b7895dba3 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -32,7 +32,6 @@ import missing cdef float64_t FP_ERR = 1e-13 cdef float64_t NaN = np.NaN -cdef float64_t nan = NaN cdef int64_t iNaT = get_nat() diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index fcb052e8be63b..51b0c2966e361 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -149,7 +149,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', {{if dtype != 'uint64'}} isnan = sorted_mask[i] if isnan and keep_na: - ranks[argsorted[i]] = nan + ranks[argsorted[i]] = NaN continue {{endif}} @@ -317,7 +317,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', {{else}} if (val == nan_value) and keep_na: {{endif}} - ranks[i, argsorted[i, j]] = nan + ranks[i, argsorted[i, j]] = NaN {{if dtype == 'object'}} infs += 1 diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index e0c93e11fc461..4299459913e7e 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -22,7 +22,6 @@ from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers cdef int64_t iNaT = get_nat() cdef float64_t NaN = np.NaN -cdef float64_t nan = NaN cdef inline float64_t median_linear(float64_t* a, int n) nogil: diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 39e408660fd87..4aa69ce18905c 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -527,7 +527,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # to the result where appropriate if keep_na and mask[_as[i]]: for j in range(i - dups + 1, i + 1): - out[_as[j], 0] = nan + out[_as[j], 0] = NaN grp_na_count = dups elif tiebreak == TIEBREAK_AVERAGE: for j in range(i - dups + 1, i + 1): diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index c4dcd24142546..dea8c08759457 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -42,8 +42,6 @@ cimport util from missing cimport checknull -nan = np.nan - cdef int64_t iNaT = util.get_nat() _SIZE_HINT_LIMIT = (1 << 20) + 7 diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 36ed8a88aa78b..c90664e9e4628 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -251,7 +251,7 @@ cdef class HashTable: {{py: # name, dtype, float_group, default_na_value -dtypes = [('Float64', 'float64', True, 'nan'), +dtypes = [('Float64', 'float64', True, 'np.nan'), ('UInt64', 'uint64', False, 0), ('Int64', 'int64', False, 'iNaT')] diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 4a0944c6b1274..54dfeeff1452d 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -10,10 +10,6 @@ from numpy cimport (ndarray, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() - -cdef float64_t NaN = np.NaN -cdef float64_t nan = NaN - from pandas._libs.algos import groupsort_indexer, ensure_platform_int from pandas.core.algorithms import take_nd diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index ec41a469e29a8..cfc60256e97a3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -52,8 +52,9 @@ from util cimport (is_nan, UINT8_MAX, UINT64_MAX, INT64_MAX, INT64_MIN) from tslib import array_to_datetime +from tslibs.nattype cimport NPY_NAT +from tslibs.nattype import NaT from tslibs.conversion cimport convert_to_tsobject -from tslibs.nattype cimport NAT, NPY_NAT from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone, tz_compare @@ -68,7 +69,7 @@ cdef object oINT64_MIN = INT64_MIN cdef object oUINT64_MAX = UINT64_MAX cdef bint PY2 = sys.version_info[0] == 2 -cdef float64_t nan = np.NaN +cdef float64_t NaN = np.NaN def values_from_object(obj: object): @@ -1203,7 +1204,7 @@ def infer_dtype(value: object, skipna: bool=False) -> str: # np.datetime64('nat') and np.timedelta64('nat') if val is None or util.is_nan(val): pass - elif val is NAT: + elif val is NaT: seen_pdnat = True else: seen_val = True @@ -1332,7 +1333,7 @@ def infer_datetimelike_array(arr: object) -> object: elif v is None or util.is_nan(v): # nan or None pass - elif v is NAT: + elif v is NaT: seen_nat = 1 elif PyDateTime_Check(v): # datetime @@ -1644,12 +1645,12 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: for i in range(n): base_val = values[i] - if base_val is not NAT: + if base_val is not NaT: base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) for j in range(i, n): val = values[j] - if val is not NAT: + if val is not NaT: tz = getattr(val, 'tzinfo', None) if not tz_compare(base_tz, tz): return False @@ -1813,7 +1814,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, if val.__hash__ is not None and val in na_values: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN elif util.is_float_object(val): fval = val if fval != fval: @@ -1844,11 +1845,11 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, seen.bool_ = True elif val is None: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN elif hasattr(val, '__len__') and len(val) == 0: if convert_empty or seen.coerce_numeric: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN else: raise ValueError('Empty string encountered') elif util.is_complex_object(val): @@ -1863,7 +1864,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, if fval in na_values: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN else: if fval != fval: seen.null_ = True @@ -1896,7 +1897,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, elif "uint64" in str(e): # Exception from check functions. raise seen.saw_null() - floats[i] = nan + floats[i] = NaN if seen.check_uint64_conflict(): return values @@ -1962,7 +1963,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if val is None: seen.null_ = 1 floats[i] = complexes[i] = fnan - elif val is NAT: + elif val is NaT: if convert_datetime: idatetimes[i] = NPY_NAT seen.datetime_ = 1 diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 7c6cae4887f5f..1fdb04dd10d8e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -11,7 +11,8 @@ cnp.import_array() cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value -from tslibs.nattype cimport NAT, checknull_with_nat +from tslibs.nattype cimport checknull_with_nat +from tslibs.nattype import NaT cdef float64_t INF = np.inf cdef float64_t NEGINF = -INF @@ -25,7 +26,7 @@ cdef inline bint _check_all_nulls(object val): if isinstance(val, (float, complex)): res = val != val - elif val is NAT: + elif val is NaT: res = 1 elif val is None: res = 1 @@ -65,7 +66,7 @@ cpdef bint checknull(object val): return val != val # and val != INF and val != NEGINF elif util.is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT - elif val is NAT: + elif val is NaT: return True elif util.is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT @@ -104,7 +105,7 @@ cpdef bint checknull_old(object val): return val != val or val == INF or val == NEGINF elif util.is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT - elif val is NAT: + elif val is NaT: return True elif util.is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT @@ -188,7 +189,7 @@ def isnaobj_old(ndarray arr): result = np.zeros(n, dtype=np.uint8) for i in range(n): val = arr[i] - result[i] = val is NAT or _check_none_nan_inf_neginf(val) + result[i] = val is NaT or _check_none_nan_inf_neginf(val) return result.view(np.bool_) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 8e27acc127d48..e346eb7e598ed 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -38,9 +38,9 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject, get_datetime64_nanos, tz_convert_utc_to_tzlocal) -# _many_ modules still look for NaT and iNaT here despite them not being needed +# many modules still look for NaT and iNaT here despite them not being needed from tslibs.nattype import nat_strings, NaT, iNaT # noqa:F821 -from tslibs.nattype cimport checknull_with_nat, NPY_NAT, NAT +from tslibs.nattype cimport checknull_with_nat, NPY_NAT from tslibs.offsets cimport to_offset @@ -129,7 +129,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NAT + result[i] = NaT else: dt64_to_dtstruct(value, &dts) result[i] = func_create(value, dts, tz, freq) @@ -137,7 +137,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NAT + result[i] = NaT else: # Python datetime objects do not support nanosecond # resolution (yet, PEP 564). Need to compute new value @@ -154,7 +154,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NAT + result[i] = NaT else: # Adjust datetime64 timestamp, recompute datetimestruct dt64_to_dtstruct(value + delta, &dts) @@ -166,7 +166,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NAT + result[i] = NaT else: # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(value, side='right') - 1 @@ -177,7 +177,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): for i in range(n): value = arr[i] if value == NPY_NAT: - result[i] = NAT + result[i] = NaT else: # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(value, side='right') - 1 @@ -434,11 +434,11 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): val = values[i] if checknull_with_nat(val): - oresult[i] = NAT + oresult[i] = NaT elif is_integer_object(val) or is_float_object(val): if val != val or val == NPY_NAT: - oresult[i] = NAT + oresult[i] = NaT else: try: oresult[i] = Timestamp(cast_from_unit(val, unit)) @@ -447,7 +447,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): elif is_string_object(val): if len(val) == 0 or val in nat_strings: - oresult[i] = NAT + oresult[i] = NaT else: oresult[i] = val @@ -747,10 +747,10 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if isinstance(val, float): oresult[i] = np.nan else: - oresult[i] = NAT + oresult[i] = NaT elif is_datetime64_object(val): if get_datetime64_value(val) == NPY_NAT: - oresult[i] = NAT + oresult[i] = NaT else: oresult[i] = val.item() else: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6120543b74816..f88671b41a16a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -35,8 +35,8 @@ from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, get_timezone, maybe_get_tz, tz_compare) from parsing import parse_datetime_string -from nattype import nat_strings -from nattype cimport NPY_NAT, checknull_with_nat, NAT +from nattype import nat_strings, NaT +from nattype cimport NPY_NAT, checknull_with_nat # ---------------------------------------------------------------------- # Constants @@ -277,7 +277,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, if is_string_object(ts): return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) - if ts is None or ts is NAT: + if ts is None or ts is NaT: obj.value = NPY_NAT elif is_datetime64_object(ts): if ts.view('i8') == NPY_NAT: @@ -425,7 +425,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, assert is_string_object(ts) if len(ts) == 0 or ts in nat_strings: - ts = NAT + ts = NaT elif ts == 'now': # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns utc diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd index dc3bb5220edd9..382ac9d323918 100644 --- a/pandas/_libs/tslibs/nattype.pxd +++ b/pandas/_libs/tslibs/nattype.pxd @@ -7,16 +7,3 @@ cdef bint _nat_scalar_rules[6] cdef bint checknull_with_nat(object val) cdef bint is_null_datetimelike(object val) - -from cpython.datetime cimport datetime - - -cdef class _NaT(datetime): - cdef readonly: - int64_t value - object freq - -# By declaring NAT in the .pxd file and cimporting it into other cython -# modules, we make `if thing is NaT` into a C pointer lookup and avoid a -# runtime lookup of NaT in the module namespace. -cdef _NaT NAT diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 4320e03a9e143..a010cbf76cf5d 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -47,7 +47,7 @@ def _make_nan_func(func_name, doc): def _make_nat_func(func_name, doc): def f(*args, **kwargs): - return NAT + return NaT f.__name__ = func_name f.__doc__ = doc return f @@ -67,10 +67,10 @@ def _make_error_func(func_name, cls): cdef _nat_divide_op(self, other): - if PyDelta_Check(other) or is_timedelta64_object(other) or other is NAT: + if PyDelta_Check(other) or is_timedelta64_object(other) or other is NaT: return np.nan if is_integer_object(other) or is_float_object(other): - return NAT + return NaT return NotImplemented @@ -82,16 +82,15 @@ cdef _nat_rdivide_op(self, other): def __nat_unpickle(*args): # return constant defined in the module - return NAT + return NaT # ---------------------------------------------------------------------- cdef class _NaT(datetime): - # Actual declarations are in the accompanying .pxd file - # cdef readonly: - # int64_t value - # object freq + cdef readonly: + int64_t value + object freq def __hash__(_NaT self): # py3k needs this defined here @@ -117,26 +116,26 @@ cdef class _NaT(datetime): def __add__(self, other): if PyDateTime_Check(other): - return NAT + return NaT elif hasattr(other, 'delta'): # Timedelta, offsets.Tick, offsets.Week - return NAT + return NaT elif getattr(other, '_typ', None) in ['dateoffset', 'series', 'period', 'datetimeindex', 'timedeltaindex']: # Duplicate logic in _Timestamp.__add__ to avoid needing # to subclass; allows us to @final(_Timestamp.__add__) return NotImplemented - return NAT + return NaT def __sub__(self, other): # Duplicate some logic from _Timestamp.__sub__ to avoid needing # to subclass; allows us to @final(_Timestamp.__sub__) if PyDateTime_Check(other): - return NAT + return NaT elif PyDelta_Check(other): - return NAT + return NaT elif getattr(other, '_typ', None) == 'datetimeindex': # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex @@ -155,13 +154,13 @@ cdef class _NaT(datetime): 'periodindex', 'dateoffset']: return NotImplemented - return NAT + return NaT def __pos__(self): - return NAT + return NaT def __neg__(self): - return NAT + return NaT def __div__(self, other): return _nat_divide_op(self, other) @@ -174,7 +173,7 @@ cdef class _NaT(datetime): def __mul__(self, other): if is_integer_object(other) or is_float_object(other): - return NAT + return NaT return NotImplemented @property @@ -272,7 +271,7 @@ class NaTType(_NaT): def __rmul__(self, other): if is_integer_object(other) or is_float_object(other): - return NAT + return NaT return NotImplemented # ---------------------------------------------------------------------- @@ -661,16 +660,13 @@ class NaTType(_NaT): NaT = NaTType() -# NAT is a C alias for NaT that can be checked for "if thing is NAT" without -# having to do a module-level lookup for NaT. -cdef _NaT NAT = NaT # ---------------------------------------------------------------------- cdef inline bint checknull_with_nat(object val): """ utility to check if a value is a nat or not """ - return val is None or util.is_nan(val) or val is NAT + return val is None or util.is_nan(val) or val is NaT cdef inline bint is_null_datetimelike(object val): @@ -687,7 +683,7 @@ cdef inline bint is_null_datetimelike(object val): """ if val is None or util.is_nan(val): return True - elif val is NAT: + elif val is NaT: return True elif util.is_timedelta64_object(val): return val.view('int64') == NPY_NAT diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 34bfaf0b31bce..ebcbea0ee30b3 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -46,8 +46,8 @@ from frequencies cimport (get_freq_code, get_base_alias, get_rule_month) from parsing import parse_time_string from resolution import Resolution -from nattype import nat_strings -from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike, NAT +from nattype import nat_strings, NaT +from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike from offsets cimport to_offset from offsets import _Tick @@ -1194,7 +1194,7 @@ def period_format(int64_t value, int freq, object fmt=None): int freq_group if value == NPY_NAT: - return repr(NAT) + return repr(NaT) if fmt is None: freq_group = get_freq_group(freq) @@ -1459,7 +1459,7 @@ def extract_ordinals(object[:] values, freq): except AttributeError: p = Period(p, freq=freq) - if p is NAT: + if p is NaT: # input may contain NaT-like string ordinals[i] = NPY_NAT else: @@ -1587,7 +1587,7 @@ cdef class _Period(object): Fast creation from an ordinal and freq that are already validated! """ if ordinal == NPY_NAT: - return NAT + return NaT else: freq = cls._maybe_convert_freq(freq) self = _Period.__new__(cls, ordinal, freq) @@ -1599,7 +1599,7 @@ cdef class _Period(object): msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) - elif other is NAT: + elif other is NaT: return _nat_scalar_rules[op] # index/series like elif hasattr(other, '_typ'): @@ -1647,8 +1647,8 @@ cdef class _Period(object): if (PyDelta_Check(other) or util.is_timedelta64_object(other) or util.is_offset_object(other)): return self._add_delta(other) - elif other is NAT: - return NAT + elif other is NaT: + return NaT elif util.is_integer_object(other): maybe_integer_op_deprecated(self) @@ -1695,8 +1695,8 @@ cdef class _Period(object): else: # pragma: no cover return NotImplemented elif is_period_object(other): - if self is NAT: - return NAT + if self is NaT: + return NaT return NotImplemented else: return NotImplemented @@ -2458,7 +2458,7 @@ class Period(_Period): value = str(value) value = value.upper() dt, _, reso = parse_time_string(value, freq) - if dt is NAT: + if dt is NaT: ordinal = NPY_NAT if freq is None: diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 13f4dc7cadf48..c09a8e5b395ee 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -31,8 +31,8 @@ from util cimport (is_timedelta64_object, is_datetime64_object, from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) -from nattype import nat_strings -from nattype cimport checknull_with_nat, NPY_NAT, NAT +from nattype import nat_strings, NaT +from nattype cimport checknull_with_nat, NPY_NAT from offsets cimport to_offset # ---------------------------------------------------------------------- @@ -119,7 +119,7 @@ def ints_to_pytimedelta(int64_t[:] arr, box=False): value = arr[i] if value == NPY_NAT: - result[i] = NAT + result[i] = NaT else: if box: result[i] = Timedelta(value) @@ -568,8 +568,8 @@ def _binary_op_method_timedeltalike(op, name): return op(self, other.delta) return NotImplemented - elif other is NAT: - return NAT + elif other is NaT: + return NaT elif is_timedelta64_object(other): # convert to Timedelta below; avoid catching this in @@ -603,9 +603,9 @@ def _binary_op_method_timedeltalike(op, name): # failed to parse as timedelta return NotImplemented - if other is NAT: + if other is NaT: # e.g. if original other was timedelta64('NaT') - return NAT + return NaT return Timedelta(op(self.value, other.value), unit='ns') f.__name__ = name @@ -1171,7 +1171,7 @@ class Timedelta(_Timedelta): unit = parse_timedelta_unit(unit) value = convert_to_timedelta64(value, unit) elif checknull_with_nat(value): - return NAT + return NaT else: raise ValueError( "Value must be Timedelta, string, integer, " @@ -1182,7 +1182,7 @@ class Timedelta(_Timedelta): # nat if value == NPY_NAT: - return NAT + return NaT # make timedelta happy td_base = _Timedelta.__new__(cls, microseconds=int(value) / 1000) @@ -1270,13 +1270,13 @@ class Timedelta(_Timedelta): # i.e. np.nan, but also catch np.float64("NaN") which would # otherwise get caught by the hasattr(other, "dtype") branch # incorrectly return a np.timedelta64 object. - return NAT + return NaT elif hasattr(other, 'dtype'): # ndarray-like return other * self.to_timedelta64() - elif other is NAT: + elif other is NaT: raise TypeError('Cannot multiply Timedelta with NaT') elif not (is_integer_object(other) or is_float_object(other)): @@ -1303,7 +1303,7 @@ class Timedelta(_Timedelta): # i.e. np.nan, but also catch np.float64("NaN") which would # otherwise get caught by the hasattr(other, "dtype") branch # incorrectly return a np.timedelta64 object. - return NAT + return NaT elif hasattr(other, 'dtype'): return self.to_timedelta64() / other @@ -1316,7 +1316,7 @@ class Timedelta(_Timedelta): return NotImplemented other = Timedelta(other) - if other is NAT: + if other is NaT: return np.nan return self.value / float(other.value) @@ -1339,8 +1339,8 @@ class Timedelta(_Timedelta): return NotImplemented other = Timedelta(other) - if other is NAT: - return NAT + if other is NaT: + return NaT return float(other.value) / self.value if not PY3: @@ -1382,7 +1382,7 @@ class Timedelta(_Timedelta): return NotImplemented other = Timedelta(other) - if other is NAT: + if other is NaT: return np.nan return self.value // other.value @@ -1428,7 +1428,7 @@ class Timedelta(_Timedelta): return NotImplemented other = Timedelta(other) - if other is NAT: + if other is NaT: return np.nan return other.value // self.value diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 61eb002465db4..72a7294f10c26 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -25,7 +25,8 @@ from conversion import tz_localize_to_utc, normalize_i8_timestamps from conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) from fields import get_start_end_field, get_date_name_field -from nattype cimport NPY_NAT, NAT +from nattype import NaT +from nattype cimport NPY_NAT from np_datetime import OutOfBoundsDatetime from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, npy_datetimestruct, dt64_to_dtstruct) @@ -204,7 +205,7 @@ cdef class _Timestamp(datetime): if isinstance(other, _Timestamp): ots = other - elif other is NAT: + elif other is NaT: return op == Py_NE elif PyDateTime_Check(other): if self.nanosecond == 0: @@ -339,9 +340,9 @@ cdef class _Timestamp(datetime): elif is_integer_object(other): maybe_integer_op_deprecated(self) - if self is NAT: + if self is NaT: # to be compat with Period - return NAT + return NaT elif self.freq is None: raise ValueError("Cannot add integral value to Timestamp " "without freq.") @@ -383,8 +384,8 @@ cdef class _Timestamp(datetime): elif getattr(other, '_typ', None) == 'timedeltaindex': return (-other).__add__(self) - elif other is NAT: - return NAT + elif other is NaT: + return NaT # coerce if necessary if we are a Timestamp-like if (PyDateTime_Check(self) @@ -730,7 +731,7 @@ class Timestamp(_Timestamp): ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) if ts.value == NPY_NAT: - return NAT + return NaT if is_string_object(freq): freq = to_offset(freq) From d46d516b8d85d03cc6089906a7bd55cdb34d795b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Nov 2018 08:03:50 -0800 Subject: [PATCH 07/13] standardize iNaT-->NPY_NAT --- pandas/_libs/algos.pyx | 10 +++++----- pandas/_libs/algos_rank_helper.pxi.in | 6 +++--- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/groupby_helper.pxi.in | 14 +++++++------- pandas/_libs/hashtable.pyx | 2 +- pandas/_libs/hashtable_class_helper.pxi.in | 2 +- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 8567b7895dba3..ce79aec9a4392 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -33,7 +33,7 @@ cdef float64_t FP_ERR = 1e-13 cdef float64_t NaN = np.NaN -cdef int64_t iNaT = get_nat() +cdef int64_t NPY_NAT = get_nat() tiebreakers = { 'average': TIEBREAK_AVERAGE, @@ -810,7 +810,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): n = len(arr) if n == 1: - if arr[0] != arr[0] or (timelike and arr[0] == iNaT): + if arr[0] != arr[0] or (timelike and arr[0] == NPY_NAT): # single value is NaN return False, False, True else: @@ -818,7 +818,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): elif n < 2: return True, True, True - if timelike and arr[0] == iNaT: + if timelike and arr[0] == NPY_NAT: return False, False, True if algos_t is not object: @@ -826,7 +826,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): prev = arr[0] for i in range(1, n): cur = arr[i] - if timelike and cur == iNaT: + if timelike and cur == NPY_NAT: is_monotonic_inc = 0 is_monotonic_dec = 0 break @@ -851,7 +851,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): prev = arr[0] for i in range(1, n): cur = arr[i] - if timelike and cur == iNaT: + if timelike and cur == NPY_NAT: is_monotonic_inc = 0 is_monotonic_dec = 0 break diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 51b0c2966e361..4d144dcf2808a 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -74,9 +74,9 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} - mask = values == iNaT + mask = values == NPY_NAT - # create copy in case of iNaT + # create copy in case of NPY_NAT # values are mutated inplace if mask.any(): values = values.copy() @@ -257,7 +257,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} - mask = values == iNaT + mask = values == NPY_NAT {{endif}} np.putmask(values, mask, nan_value) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 4299459913e7e..8b93ec04c2aac 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -19,7 +19,7 @@ from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN, TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE) from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers -cdef int64_t iNaT = get_nat() +cdef int64_t NPY_NAT = get_nat() cdef float64_t NaN = np.NaN diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 4aa69ce18905c..380f4d4bbc45c 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -277,7 +277,7 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, # name, c_type, nan_val dtypes = [('float64', 'float64_t', 'NAN'), ('float32', 'float32_t', 'NAN'), - ('int64', 'int64_t', 'iNaT'), + ('int64', 'int64_t', 'NPY_NAT'), ('object', 'object', 'NAN')] def get_dispatch(dtypes): @@ -630,7 +630,7 @@ def group_max(ndarray[groupby_t, ndim=2] out, if groupby_t is int64_t: # Note: evaluated at compile-time maxx[:] = -_int64_max - nan_val = iNaT + nan_val = NPY_NAT else: maxx[:] = -np.inf nan_val = NAN @@ -692,7 +692,7 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx = np.empty_like(out) if groupby_t is int64_t: minx[:] = _int64_max - nan_val = iNaT + nan_val = NPY_NAT else: minx[:] = np.inf nan_val = NAN @@ -762,8 +762,8 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, # val = nan if groupby_t is int64_t: - if is_datetimelike and val == iNaT: - out[i, j] = iNaT + if is_datetimelike and val == NPY_NAT: + out[i, j] = NPY_NAT else: mval = accum[lab, j] if val < mval: @@ -809,8 +809,8 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, val = values[i, j] if groupby_t is int64_t: - if is_datetimelike and val == iNaT: - out[i, j] = iNaT + if is_datetimelike and val == NPY_NAT: + out[i, j] = NPY_NAT else: mval = accum[lab, j] if val > mval: diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index dea8c08759457..9aa887727a765 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -42,7 +42,7 @@ cimport util from missing cimport checknull -cdef int64_t iNaT = util.get_nat() +cdef int64_t NPY_NAT = util.get_nat() _SIZE_HINT_LIMIT = (1 << 20) + 7 diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index c90664e9e4628..a71023ed34f44 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -253,7 +253,7 @@ cdef class HashTable: # name, dtype, float_group, default_na_value dtypes = [('Float64', 'float64', True, 'np.nan'), ('UInt64', 'uint64', False, 0), - ('Int64', 'int64', False, 'iNaT')] + ('Int64', 'int64', False, 'NPY_NAT')] }} From f2f4b8dda74f20a47b42808e8521d7d9099e1235 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Nov 2018 08:12:51 -0800 Subject: [PATCH 08/13] comment cleanup --- pandas/_libs/algos_common_helper.pxi.in | 4 ++-- pandas/_libs/algos_take_helper.pxi.in | 4 ++-- pandas/_libs/groupby_helper.pxi.in | 4 ++-- pandas/_libs/index_class_helper.pxi.in | 4 ++-- pandas/_libs/sparse_op_helper.pxi.in | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index c2b0a4119e6e5..3708deb1a4b76 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -84,9 +84,9 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # ensure_dtype -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- cdef int PLATFORM_INT = (np.arange(0, dtype=np.intp)).descr.type_num diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index bd5feef1ff2b0..2fea8b17fd9d7 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for take WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # take_1d, take_2d -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 380f4d4bbc45c..523d43f893aad 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -268,9 +268,9 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # group_nth, group_last, group_rank -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index c19812efaaa35..ff95917f6643a 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -4,9 +4,9 @@ Template for functions of IndexEngine subclasses. WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # IndexEngine Subclass Methods -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index d02a985de1d61..1f41096a3f194 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for sparse ops WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Sparse op -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- ctypedef fused sparse_t: float64_t From 03ae9cfb5e793cd2a7d1ec18514dd5750556b1aa Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Nov 2018 09:37:10 -0800 Subject: [PATCH 09/13] remove unncessary cpdef --- pandas/_libs/algos.pxd | 3 --- pandas/_libs/algos.pyx | 3 ++- pandas/_libs/groupby.pyx | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd index 0888cf3c85f2f..5df1e381ea3ce 100644 --- a/pandas/_libs/algos.pxd +++ b/pandas/_libs/algos.pxd @@ -1,9 +1,6 @@ from util cimport numeric -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil - - cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: cdef: numeric t diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index ce79aec9a4392..d2d5b6fd3cd3d 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -195,9 +195,10 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): return result, counts +# TODO: redundant with groupby.kth_smallest_c @cython.boundscheck(False) @cython.wraparound(False) -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil: +def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric: cdef: Py_ssize_t i, j, l, m, n = a.shape[0] numeric x diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 8b93ec04c2aac..7c16b29f3e42b 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -65,7 +65,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: return result -# TODO: Is this redundant with algos.kth_smallest? +# TODO: Is this redundant with algos.kth_smallest cdef inline float64_t kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n) nogil: From 33fc12da8c9b58627390d0351418611f9b405ab7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Nov 2018 13:55:55 -0800 Subject: [PATCH 10/13] delete unused or commented-out --- pandas/_libs/sparse.pyx | 65 ----------------------------------------- 1 file changed, 65 deletions(-) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index b8ca744ac88c4..bfd7cedded842 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -673,13 +673,6 @@ cdef class BlockMerge(object): self.yi = xi -cdef class BlockIntersection(BlockMerge): - """ - not done yet - """ - pass - - cdef class BlockUnion(BlockMerge): """ Object-oriented approach makes sharing state between recursive functions a @@ -804,64 +797,6 @@ cdef class BlockUnion(BlockMerge): include "sparse_op_helper.pxi" - -# ----------------------------------------------------------------------------- -# Indexing operations - -def get_reindexer(ndarray[object, ndim=1] values, dict index_map): - cdef: - object idx - Py_ssize_t i - Py_ssize_t new_length = len(values) - ndarray[int32_t, ndim=1] indexer - - indexer = np.empty(new_length, dtype=np.int32) - - for i in range(new_length): - idx = values[i] - if idx in index_map: - indexer[i] = index_map[idx] - else: - indexer[i] = -1 - - return indexer - -# def reindex_block(ndarray[float64_t, ndim=1] values, -# BlockIndex sparse_index, -# ndarray[int32_t, ndim=1] indexer): -# cdef: -# Py_ssize_t i, length -# ndarray[float64_t, ndim=1] out - -# out = np.empty(length, dtype=np.float64) - -# for i in range(length): -# if indexer[i] == -1: -# pass - - -# cdef class SparseCruncher(object): -# """ -# Class to acquire float pointer for convenient operations on sparse data -# structures -# """ -# cdef: -# SparseIndex index -# float64_t* buf - -# def __init__(self, ndarray[float64_t, ndim=1, mode='c'] values, -# SparseIndex index): - -# self.index = index -# self.buf = values.data - - -def reindex_integer(ndarray[float64_t, ndim=1] values, - IntIndex sparse_index, - ndarray[int32_t, ndim=1] indexer): - pass - - # ----------------------------------------------------------------------------- # SparseArray mask create operations From 8bc71b8617f811bd7fc9dfa75cba58773415abc1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Nov 2018 13:57:14 -0800 Subject: [PATCH 11/13] remove unused --- pandas/_libs/sparse.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index bfd7cedded842..2334f735f817d 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -22,9 +22,6 @@ _np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') cdef float64_t NaN = np.NaN cdef float64_t INF = np.inf -cdef inline int int_max(int a, int b): return a if a >= b else b -cdef inline int int_min(int a, int b): return a if a <= b else b - # ----------------------------------------------------------------------------- From 5e50897ccbf3b0f7e40568cfffa08621b34bc52d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Nov 2018 15:53:57 -0800 Subject: [PATCH 12/13] whitespace fixup --- pandas/_libs/sparse.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 2334f735f817d..668bd0ae6bbb7 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -794,6 +794,7 @@ cdef class BlockUnion(BlockMerge): include "sparse_op_helper.pxi" + # ----------------------------------------------------------------------------- # SparseArray mask create operations From 23344b2823bf3c45f5ba88af50b1d8308d7d4f64 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 10 Nov 2018 08:36:00 -0800 Subject: [PATCH 13/13] remove unliked comment --- pandas/_libs/algos.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index d2d5b6fd3cd3d..e77899507833f 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -195,7 +195,6 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): return result, counts -# TODO: redundant with groupby.kth_smallest_c @cython.boundscheck(False) @cython.wraparound(False) def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric: