From 17cb706134a723e210c306b7eeadda28078af44d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 13:21:59 -0700 Subject: [PATCH 01/16] fix extern --- pandas/_libs/tslibs/np_datetime.pxd | 2 +- pandas/_libs/tslibs/np_datetime.pyx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index a585259286a58..c3d229d4e5006 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -46,7 +46,7 @@ cdef extern from "numpy/ndarraytypes.h": NPY_FR_fs NPY_FR_as -cdef extern from "../src/datetime/np_datetime.h": +cdef extern from "src/datetime/np_datetime.h": ctypedef struct pandas_timedeltastruct: int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 76838c7a23b24..a0099837e876a 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -15,7 +15,7 @@ PyDateTime_IMPORT from numpy cimport int64_t -cdef extern from "../src/datetime/np_datetime.h": +cdef extern from "src/datetime/np_datetime.h": int cmp_npy_datetimestruct(npy_datetimestruct *a, npy_datetimestruct *b) @@ -33,7 +33,7 @@ cdef extern from "../src/datetime/np_datetime.h": npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS -cdef extern from "../src/datetime/np_datetime_strings.h": +cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(char *str, int len, npy_datetimestruct *out, int *out_local, int *out_tzoffset) From a09c9c4fae9cbc4c4262f9681af9c76d956e781f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 13:40:35 -0700 Subject: [PATCH 02/16] move stdint extern out of tslibs.util --- pandas/_libs/tslibs/period.pyx | 6 ++++-- pandas/_libs/tslibs/strptime.pyx | 10 +++++----- pandas/_libs/tslibs/timedeltas.pxd | 2 -- pandas/_libs/tslibs/timestamps.pyx | 5 ++--- pandas/_libs/tslibs/util.pxd | 15 --------------- pandas/_libs/util.pxd | 15 +++++++++++++++ 6 files changed, 26 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 65b37759ce9ce..fb5a45d3b5b91 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -26,12 +26,12 @@ from np_datetime cimport (npy_datetimestruct, dtstruct_to_dt64, pandas_datetime_to_datetimestruct, NPY_DATETIMEUNIT, NPY_FR_D) -cdef extern from "../src/datetime/np_datetime.h": +cdef extern from "src/datetime/np_datetime.h": int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, npy_datetimestruct *d) nogil cimport util -from util cimport is_period_object, is_string_object, INT32_MIN +from util cimport is_period_object, is_string_object from timestamps import Timestamp from timezones cimport is_utc, is_tzlocal, get_dst_info @@ -53,6 +53,8 @@ from offsets cimport to_offset from offsets import _Tick cdef bint PY2 = str == bytes +cdef enum: + INT32_MIN = -2147483648 ctypedef struct asfreq_info: diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 59d673881bb40..d472320cfdb12 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- -# cython: profile=False """Strptime-related classes and functions. """ import time import locale import calendar import re +from datetime import date as datetime_date # Python 2 vs Python 3 @@ -20,15 +20,14 @@ except: except: from _dummy_thread import allocate_lock as _thread_allocate_lock -import pytz - from cython cimport Py_ssize_t -from cpython cimport PyFloat_Check + + +import pytz import numpy as np from numpy cimport int64_t -from datetime import date as datetime_date from np_datetime cimport (check_dts_bounds, dtstruct_to_dt64, npy_datetimestruct) @@ -622,6 +621,7 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year, days_to_week = week_0_length + (7 * (week_of_year - 1)) return 1 + days_to_week + day_of_week + cdef parse_timezone_directive(object z): """ Parse the '%z' directive and return a pytz.FixedOffset diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 2413c281e0a52..ef9fd3207e5f0 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- # cython: profile=False -from cpython.datetime cimport timedelta - from numpy cimport int64_t # Exposed for tslib, not intended for outside use. diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index eb5c0076a868a..67420fda8aa51 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -17,8 +17,7 @@ from cpython.datetime cimport (datetime, PyDateTime_IMPORT from util cimport (is_datetime64_object, is_timedelta64_object, - is_integer_object, is_string_object, is_array, - INT64_MAX) + is_integer_object, is_string_object, is_array) cimport ccalendar from conversion import tz_localize_to_utc, normalize_i8_timestamps @@ -1103,7 +1102,7 @@ class Timestamp(_Timestamp): # Add the min and max fields at the class level -cdef int64_t _NS_UPPER_BOUND = INT64_MAX +cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max # the smallest value we could actually represent is # INT64_MIN + 1 == -9223372036854775807 # but to allow overflow free conversion with a microsecond resolution diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 0470202ee7d98..ada6cc6283401 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -43,21 +43,6 @@ cdef extern from "numpy/npy_common.h": int64_t NPY_MIN_INT64 -cdef extern from "../src/headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN - - cdef inline int64_t get_nat(): return NPY_MIN_INT64 diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 134f34330d8aa..31843a755e7b1 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -14,6 +14,21 @@ cdef extern from "src/numpy_helper.h": const char *get_c_string(object) except NULL +cdef extern from "src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + + ctypedef fused numeric: cnp.int8_t cnp.int16_t From 99341c6ff796c664d9c531e61bd2dbc0bf856756 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 13:50:30 -0700 Subject: [PATCH 03/16] cleanup imports --- pandas/_libs/tslibs/resolution.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 18cc21ccd59e0..bd62013cb2063 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False cimport cython from cython cimport Py_ssize_t @@ -11,8 +10,7 @@ from util cimport is_string_object, get_nat from np_datetime cimport npy_datetimestruct, dt64_to_dtstruct from frequencies cimport get_freq_code -from timezones cimport (is_utc, is_tzlocal, - maybe_get_tz, get_dst_info) +from timezones cimport is_utc, is_tzlocal, maybe_get_tz, get_dst_info from conversion cimport tz_convert_utc_to_tzlocal from ccalendar cimport get_days_in_month From f22bbe9710f419402bc86e246dcb10bc91e4eb4f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 13:51:54 -0700 Subject: [PATCH 04/16] de-duplicate helper.h --- pandas/_libs/src/klib/khash.h | 13 +------------ pandas/_libs/src/parse_helper.h | 1 + pandas/_libs/src/parser/tokenizer.c | 6 +++--- pandas/_libs/src/parser/tokenizer.h | 12 +----------- pandas/_libs/src/skiplist.h | 13 +------------ 5 files changed, 7 insertions(+), 38 deletions(-) diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h index 869607a44c001..f034c1d619216 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/src/klib/khash.h @@ -112,6 +112,7 @@ int main() { #include #include #include +#include "../helper.h" #if UINT_MAX == 0xffffffffu @@ -130,18 +131,6 @@ typedef signed long long khint64_t; typedef double khfloat64_t; -#ifndef PANDAS_INLINE - #if defined(__GNUC__) - #define PANDAS_INLINE static __inline__ - #elif defined(_MSC_VER) - #define PANDAS_INLINE static __inline - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define PANDAS_INLINE static inline - #else - #define PANDAS_INLINE - #endif -#endif - typedef khint32_t khint_t; typedef khint_t khiter_t; diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h index 6dd8b66eab33d..d17d9166ea3ee 100644 --- a/pandas/_libs/src/parse_helper.h +++ b/pandas/_libs/src/parse_helper.h @@ -12,6 +12,7 @@ The full license is in the LICENSE file, distributed with this software. #include #include +#include "helper.h" #include "headers/portable.h" static double xstrtod(const char *p, char **q, char decimal, char sci, diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index a18d12616a802..da0a9f7498aa8 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -363,7 +363,7 @@ static int push_char(parser_t *self, char c) { return 0; } -int P_INLINE end_field(parser_t *self) { +int PANDAS_INLINE end_field(parser_t *self) { // XXX cruft if (self->words_len >= self->words_cap) { TRACE( @@ -1381,11 +1381,11 @@ int tokenize_all_rows(parser_t *self) { return status; } -P_INLINE void uppercase(char *p) { +PANDAS_INLINE void uppercase(char *p) { for (; *p; ++p) *p = toupper(*p); } -int P_INLINE to_longlong(char *item, long long *p_value) { +int PANDAS_INLINE to_longlong(char *item, long long *p_value) { char *p_end; // Try integer conversion. We explicitly give the base to be 10. If diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 63baf91e3c136..f52ec81234423 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -27,6 +27,7 @@ See LICENSE for the license #define ERROR_INVALID_CHARS 3 #include "../headers/stdint.h" +#include "../helper.h" #include "khash.h" @@ -38,17 +39,6 @@ See LICENSE for the license #define REACHED_EOF 1 #define CALLING_READ_FAILED 2 -#ifndef P_INLINE -#if defined(__GNUC__) -#define P_INLINE static __inline__ -#elif defined(_MSC_VER) -#define P_INLINE -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -#define P_INLINE static inline -#else -#define P_INLINE -#endif -#endif #if defined(_MSC_VER) #define strtoll _strtoi64 diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h index f9527e72f577e..6d15f291ceb8b 100644 --- a/pandas/_libs/src/skiplist.h +++ b/pandas/_libs/src/skiplist.h @@ -20,18 +20,7 @@ Python recipe (http://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) #include #include #include - -#ifndef PANDAS_INLINE -#if defined(__GNUC__) -#define PANDAS_INLINE static __inline__ -#elif defined(_MSC_VER) -#define PANDAS_INLINE static __inline -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -#define PANDAS_INLINE static inline -#else -#define PANDAS_INLINE -#endif -#endif +#include "helper.h" PANDAS_INLINE float __skiplist_nanf(void) { const union { From 0c37a811af3f9b4eb5d031d09c6796fd044d9409 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 13:59:20 -0700 Subject: [PATCH 05/16] Clean up formatting, ndarray-->memoryview --- pandas/_libs/ops.pyx | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index ec9d8304f9243..54052e290dc6c 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False import operator from cpython cimport (PyFloat_Check, PyBool_Check, @@ -21,7 +20,7 @@ from missing cimport checknull @cython.wraparound(False) @cython.boundscheck(False) -def scalar_compare(ndarray[object] values, object val, object op): +def scalar_compare(object[:] values, object val, object op): """ Compare each element of `values` array with the scalar `val`, with the comparison operation described by `op`. @@ -73,7 +72,7 @@ def scalar_compare(ndarray[object] values, object val, object op): else: try: result[i] = PyObject_RichCompareBool(x, val, flag) - except (TypeError): + except TypeError: result[i] = True elif flag == Py_EQ: for i in range(n): @@ -85,7 +84,7 @@ def scalar_compare(ndarray[object] values, object val, object op): else: try: result[i] = PyObject_RichCompareBool(x, val, flag) - except (TypeError): + except TypeError: result[i] = False else: @@ -103,7 +102,7 @@ def scalar_compare(ndarray[object] values, object val, object op): @cython.wraparound(False) @cython.boundscheck(False) -def vec_compare(ndarray[object] left, ndarray[object] right, object op): +def vec_compare(object[:] left, object[:] right, object op): """ Compare the elements of `left` with the elements of `right` pointwise, with the comparison operation described by `op`. @@ -126,8 +125,8 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op): int flag if n != len(right): - raise ValueError('Arrays were different lengths: %d vs %d' - % (n, len(right))) + raise ValueError('Arrays were different lengths: {n} vs {nright}' + .format(n=n, nright=len(right))) if op is operator.lt: flag = Py_LT @@ -170,7 +169,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op): @cython.wraparound(False) @cython.boundscheck(False) -def scalar_binop(ndarray[object] values, object val, object op): +def scalar_binop(object[:] values, object val, object op): """ Apply the given binary operator `op` between each element of the array `values` and the scalar `val`. @@ -207,7 +206,7 @@ def scalar_binop(ndarray[object] values, object val, object op): @cython.wraparound(False) @cython.boundscheck(False) -def vec_binop(ndarray[object] left, ndarray[object] right, object op): +def vec_binop(object[:] left, object[:] right, object op): """ Apply the given binary operator `op` pointwise to the elements of arrays `left` and `right`. @@ -224,11 +223,11 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op): """ cdef: Py_ssize_t i, n = len(left) - ndarray[object] result + object[:] result if n != len(right): - raise ValueError('Arrays were different lengths: %d vs %d' - % (n, len(right))) + raise ValueError('Arrays were different lengths: {n} vs {nright}' + .format(n=n, nright=len(right))) result = np.empty(n, dtype=object) @@ -245,7 +244,7 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op): else: raise - return maybe_convert_bool(result) + return maybe_convert_bool(result.base) # `.base` to access np.ndarray def maybe_convert_bool(ndarray[object] arr, @@ -270,7 +269,7 @@ def maybe_convert_bool(ndarray[object] arr, if false_values is not None: false_vals = false_vals | set(false_values) - for i from 0 <= i < n: + for i in range(n): val = arr[i] if PyBool_Check(val): From 2aa9a29c349796d01de7703ea425c5ea63a31b21 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 14:12:58 -0700 Subject: [PATCH 06/16] string formatting --- pandas/_libs/tslibs/period.pyx | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index fb5a45d3b5b91..83c3a4c10ce5b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1252,7 +1252,7 @@ def period_format(int64_t value, int freq, object fmt=None): elif freq_group == 12000: # NANOSEC fmt = b'%Y-%m-%d %H:%M:%S.%n' else: - raise ValueError('Unknown freq: %d' % freq) + raise ValueError('Unknown freq: {freq}'.format(freq=freq)) return _period_strftime(value, freq, fmt) @@ -1417,7 +1417,7 @@ def get_period_field_arr(int code, int64_t[:] arr, int freq): func = _get_accessor_func(code) if func is NULL: - raise ValueError('Unrecognized period code: %d' % code) + raise ValueError('Unrecognized period code: {code}'.format(code=code)) sz = len(arr) out = np.empty(sz, dtype=np.int64) @@ -1599,7 +1599,8 @@ cdef class _Period(object): if freq.n <= 0: raise ValueError('Frequency must be positive, because it' - ' represents span: {0}'.format(freq.freqstr)) + ' represents span: {freqstr}' + .format(freqstr=freq.freqstr)) return freq @@ -1631,8 +1632,9 @@ cdef class _Period(object): return NotImplemented elif op == Py_NE: return NotImplemented - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) + raise TypeError('Cannot compare type {cls} with type {typ}' + .format(cls=type(self).__name__, + typ=type(other).__name__)) def __hash__(self): return hash((self.ordinal, self.freqstr)) @@ -2430,8 +2432,8 @@ class Period(_Period): freq = cls._maybe_convert_freq(freq) if ordinal is not None and value is not None: - raise ValueError(("Only value or ordinal but not both should be " - "given but not both")) + raise ValueError("Only value or ordinal but not both should be " + "given but not both") elif ordinal is not None: if not util.is_integer_object(ordinal): raise ValueError("Ordinal must be an integer") @@ -2483,7 +2485,8 @@ class Period(_Period): freq = Resolution.get_freq(reso) except KeyError: raise ValueError( - "Invalid frequency or could not infer: %s" % reso) + "Invalid frequency or could not infer: {reso}" + .format(reso=reso)) elif isinstance(value, datetime): dt = value From 15595e5ebad8151d5b2901e1699bc4af50bed90a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 14:25:02 -0700 Subject: [PATCH 07/16] move leftover period_helper code out of extern block --- pandas/_libs/ops.pyx | 8 +-- pandas/_libs/tslibs/period.pyx | 106 ++++++++++++--------------------- 2 files changed, 42 insertions(+), 72 deletions(-) diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 54052e290dc6c..a194f1588e231 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -186,13 +186,13 @@ def scalar_binop(object[:] values, object val, object op): """ cdef: Py_ssize_t i, n = len(values) - ndarray[object] result + object[:] result object x result = np.empty(n, dtype=object) if val is None or is_nan(val): - result.fill(val) - return result + result[:] = val + return result.base # `.base` to access underlying np.ndarray for i in range(n): x = values[i] @@ -201,7 +201,7 @@ def scalar_binop(object[:] values, object val, object op): else: result[i] = op(x, val) - return maybe_convert_bool(result) + return maybe_convert_bool(result.base) @cython.wraparound(False) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 83c3a4c10ce5b..a99b1a5e1b4ef 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -69,58 +69,21 @@ ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil cdef extern from *: """ /*** FREQUENCY CONSTANTS ***/ + // See frequencies.pyx for more detailed variants #define FR_ANN 1000 /* Annual */ - #define FR_ANNDEC FR_ANN /* Annual - December year end*/ - #define FR_ANNJAN 1001 /* Annual - January year end*/ - #define FR_ANNFEB 1002 /* Annual - February year end*/ - #define FR_ANNMAR 1003 /* Annual - March year end*/ - #define FR_ANNAPR 1004 /* Annual - April year end*/ - #define FR_ANNMAY 1005 /* Annual - May year end*/ - #define FR_ANNJUN 1006 /* Annual - June year end*/ - #define FR_ANNJUL 1007 /* Annual - July year end*/ - #define FR_ANNAUG 1008 /* Annual - August year end*/ - #define FR_ANNSEP 1009 /* Annual - September year end*/ - #define FR_ANNOCT 1010 /* Annual - October year end*/ - #define FR_ANNNOV 1011 /* Annual - November year end*/ - - /* The standard quarterly frequencies with various fiscal year ends - eg, Q42005 for Q@OCT runs Aug 1, 2005 to Oct 31, 2005 */ #define FR_QTR 2000 /* Quarterly - December year end (default Q) */ - #define FR_QTRDEC FR_QTR /* Quarterly - December year end */ - #define FR_QTRJAN 2001 /* Quarterly - January year end */ - #define FR_QTRFEB 2002 /* Quarterly - February year end */ - #define FR_QTRMAR 2003 /* Quarterly - March year end */ - #define FR_QTRAPR 2004 /* Quarterly - April year end */ - #define FR_QTRMAY 2005 /* Quarterly - May year end */ - #define FR_QTRJUN 2006 /* Quarterly - June year end */ - #define FR_QTRJUL 2007 /* Quarterly - July year end */ - #define FR_QTRAUG 2008 /* Quarterly - August year end */ - #define FR_QTRSEP 2009 /* Quarterly - September year end */ - #define FR_QTROCT 2010 /* Quarterly - October year end */ - #define FR_QTRNOV 2011 /* Quarterly - November year end */ - - #define FR_MTH 3000 /* Monthly */ - - #define FR_WK 4000 /* Weekly */ - #define FR_WKSUN FR_WK /* Weekly - Sunday end of week */ - #define FR_WKMON 4001 /* Weekly - Monday end of week */ - #define FR_WKTUE 4002 /* Weekly - Tuesday end of week */ - #define FR_WKWED 4003 /* Weekly - Wednesday end of week */ - #define FR_WKTHU 4004 /* Weekly - Thursday end of week */ - #define FR_WKFRI 4005 /* Weekly - Friday end of week */ - #define FR_WKSAT 4006 /* Weekly - Saturday end of week */ - - #define FR_BUS 5000 /* Business days */ - #define FR_DAY 6000 /* Daily */ - #define FR_HR 7000 /* Hourly */ - #define FR_MIN 8000 /* Minutely */ - #define FR_SEC 9000 /* Secondly */ - #define FR_MS 10000 /* Millisecondly */ - #define FR_US 11000 /* Microsecondly */ - #define FR_NS 12000 /* Nanosecondly */ - - #define FR_UND -10000 /* Undefined */ + #define FR_MTH 3000 /* Monthly */ + #define FR_WK 4000 /* Weekly */ + #define FR_BUS 5000 /* Business days */ + #define FR_DAY 6000 /* Daily */ + #define FR_HR 7000 /* Hourly */ + #define FR_MIN 8000 /* Minutely */ + #define FR_SEC 9000 /* Secondly */ + #define FR_MS 10000 /* Millisecondly */ + #define FR_US 11000 /* Microsecondly */ + #define FR_NS 12000 /* Nanosecondly */ + #define FR_UND -10000 /* Undefined */ static int64_t daytime_conversion_factor_matrix[7][7] = { {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, @@ -130,26 +93,8 @@ cdef extern from *: {0, 0, 0, 0, 1, 1000, 1000000}, {0, 0, 0, 0, 0, 1, 1000}, {0, 0, 0, 0, 0, 0, 1}}; - - int max_value(int a, int b) { return a > b ? a : b; } - - static int min_value(int a, int b) { return a < b ? a : b; } - - npy_int64 get_daytime_conversion_factor(int from_index, int to_index) { - int row = min_value(from_index, to_index); - int col = max_value(from_index, to_index); - // row or col < 6 means frequency strictly lower than Daily, which - // do not use daytime_conversion_factors - if (row < 6) { - return 0; - } else if (col < 6) { - return 0; - } - return daytime_conversion_factor_matrix[row - 6][col - 6]; - } """ - int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil - int max_value(int left, int right) nogil + int64_t daytime_conversion_factor_matrix[7][7] int FR_ANN int FR_QTR int FR_MTH @@ -165,6 +110,31 @@ cdef extern from *: int FR_UND +cdef int max_value(int left, int right) nogil: + if left > right: + return left + return right + + +cdef int min_value(int left, int right) nogil: + if left < right: + return left + return right + + +cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil: + cdef: + int row = min_value(from_index, to_index) + int col = max_value(from_index, to_index) + # row or col < 6 means frequency strictly lower than Daily, which + # do not use daytime_conversion_factors + if row < 6: + return 0 + elif col < 6: + return 0 + return daytime_conversion_factor_matrix[row - 6][col - 6] + + cdef int64_t nofunc(int64_t ordinal, asfreq_info *af_info): return np.iinfo(np.int32).min From 74e7d19fd6215975835bcfb4b2b9fab107f5b0a5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 14:49:08 -0700 Subject: [PATCH 08/16] modernize for loop notation, assorted cleanups --- pandas/_libs/algos_common_helper.pxi.in | 2 +- pandas/_libs/algos_take_helper.pxi.in | 22 +++++----- pandas/_libs/hashtable_func_helper.pxi.in | 8 ++-- pandas/_libs/lib.pyx | 43 ++++++++++--------- pandas/_libs/sparse.pyx | 25 +++++------ pandas/_libs/sparse_op_helper.pxi.in | 4 +- pandas/_libs/tslibs/frequencies.pyx | 1 - pandas/_libs/tslibs/offsets.pyx | 12 ------ pandas/_libs/tslibs/period.pyx | 1 + .../_libs/tslibs/src/datetime/np_datetime.c | 12 ------ .../_libs/tslibs/src/datetime/np_datetime.h | 2 - pandas/tests/tseries/offsets/test_fiscal.py | 12 +++++- pandas/tests/tseries/offsets/test_offsets.py | 13 +++++- 13 files changed, 77 insertions(+), 80 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 0d3f6664da9e3..97b7196da80bb 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -523,7 +523,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values, Py_ssize_t i, j, k k = len(values) - for j from 0 <= j < k: + for j in range(k): i = indexer[j] out[i] = values[j, loc] diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 71bb1bb4fe9be..0e69324acd341 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -46,7 +46,7 @@ def get_dispatch(dtypes): fv = fill_value %(nogil_str)s - %(tab)sfor i from 0 <= i < n: + %(tab)sfor i in range(n): %(tab)s idx = indexer[i] %(tab)s if idx == -1: %(tab)s out[i] = fv @@ -74,10 +74,10 @@ def get_dispatch(dtypes): values.strides[1] == sizeof(%(c_type_out)s) and sizeof(%(c_type_out)s) * n >= 256): - for i from 0 <= i < n: + for i in range(n): idx = indexer[i] if idx == -1: - for j from 0 <= j < k: + for j in range(k): out[i, j] = fv else: v = &values[idx, 0] @@ -85,13 +85,13 @@ def get_dispatch(dtypes): memmove(o, v, (sizeof(%(c_type_out)s) * k)) return - for i from 0 <= i < n: + for i in range(n): idx = indexer[i] if idx == -1: - for j from 0 <= j < k: + for j in range(k): out[i, j] = fv else: - for j from 0 <= j < k: + for j in range(k): out[i, j] = %(preval)svalues[idx, j]%(postval)s """ @@ -108,8 +108,8 @@ def get_dispatch(dtypes): fv = fill_value - for i from 0 <= i < n: - for j from 0 <= j < k: + for i in range(n): + for j in range(k): idx = indexer[j] if idx == -1: out[i, j] = fv @@ -246,13 +246,13 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, k = len(idx1) fv = fill_value - for i from 0 <= i < n: + for i in range(n): idx = idx0[i] if idx == -1: - for j from 0 <= j < k: + for j in range(k): out[i, j] = fv else: - for j from 0 <= j < k: + for j in range(k): if idx1[j] == -1: out[i, j] = fv else: diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 521e564447c59..5aea0c65d6dd0 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -161,18 +161,18 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): {{endif}} elif keep == 'first': {{if dtype == 'object'}} - for i from 0 <= i < n: + for i in range(n): kh_put_{{ttype}}(table, values[i], &ret) out[i] = ret == 0 {{else}} with nogil: - for i from 0 <= i < n: + for i in range(n): kh_put_{{ttype}}(table, values[i], &ret) out[i] = ret == 0 {{endif}} else: {{if dtype == 'object'}} - for i from 0 <= i < n: + for i in range(n): value = values[i] k = kh_get_{{ttype}}(table, value) if k != table.n_buckets: @@ -185,7 +185,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): out[i] = 0 {{else}} with nogil: - for i from 0 <= i < n: + for i in range(n): value = values[i] k = kh_get_{{ttype}}(table, value) if k != table.n_buckets: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index d80b5fd2bd0b9..6c971a331fe35 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -37,29 +37,30 @@ cdef int64_t NPY_NAT = util.get_nat() from util cimport is_array, is_nan -def values_from_object(object o): +def values_from_object(object obj): """ return my values or the object if we are say an ndarray """ - cdef f + cdef func # TODO: Does declaring this without a type accomplish anything? - f = getattr(o, 'get_values', None) - if f is not None: - o = f() + func = getattr(obj, 'get_values', None) + if func is not None: + obj = func() - return o + return obj @cython.wraparound(False) @cython.boundscheck(False) -def memory_usage_of_objects(ndarray[object, ndim=1] arr): +def memory_usage_of_objects(object[:] arr): """ return the memory usage of an object array in bytes, does not include the actual bytes of the pointers """ - cdef Py_ssize_t i, n - cdef int64_t s = 0 + cdef: + Py_ssize_t i, n + int64_t size = 0 n = len(arr) - for i from 0 <= i < n: - s += arr[i].__sizeof__() - return s + for i in range(n): + size += arr[i].__sizeof__() + return size # ---------------------------------------------------------------------- @@ -138,10 +139,10 @@ def fast_unique_multiple(list arrays): dict table = {} object val, stub = 0 - for i from 0 <= i < k: + for i in range(n): buf = arrays[i] n = len(buf) - for j from 0 <= j < n: + for j in range(n): val = buf[j] if val not in table: table[val] = stub @@ -165,10 +166,10 @@ def fast_unique_multiple_list(list lists, bint sort=True): dict table = {} object val, stub = 0 - for i from 0 <= i < k: + for i in range(k): buf = lists[i] n = len(buf) - for j from 0 <= j < n: + for j in range(n): val = buf[j] if val not in table: table[val] = stub @@ -208,7 +209,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True): for buf in gen: n = len(buf) - for j from 0 <= j < n: + for j in range(n): val = buf[j] if val not in table: table[val] = stub @@ -669,15 +670,15 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, if axis == 0: counts = np.zeros((max_bin, k), dtype='i8') with nogil: - for i from 0 <= i < n: - for j from 0 <= j < k: + for i in range(n): + for j in range(k): counts[labels[i], j] += mask[i, j] else: # axis == 1 counts = np.zeros((n, max_bin), dtype='i8') with nogil: - for i from 0 <= i < n: - for j from 0 <= j < k: + for i in range(n): + for j in range(k): counts[i, labels[j]] += mask[i, j] return counts diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 2abd270652433..7f5990ce5d65c 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -148,7 +148,7 @@ cdef class IntIndex(SparseIndex): new_indices = np.empty(min( len(xindices), len(yindices)), dtype=np.int32) - for xi from 0 <= xi < self.npoints: + for xi in range(self.npoints): xind = xindices[xi] while yi < y.npoints and yindices[yi] < xind: @@ -292,7 +292,7 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices): # TODO: two-pass algorithm faster? prev = block = indices[0] - for i from 1 <= i < npoints: + for i in range(1, npoints): cur = indices[i] if cur - prev > 1: # new block @@ -383,21 +383,22 @@ cdef class BlockIndex(SparseIndex): if len(blocs) != len(blengths): raise ValueError('block bound arrays must be same length') - for i from 0 <= i < self.nblocks: + for i in range(self.nblocks): if i > 0: if blocs[i] <= blocs[i - 1]: raise ValueError('Locations not in ascending order') if i < self.nblocks - 1: if blocs[i] + blengths[i] > blocs[i + 1]: - raise ValueError('Block %d overlaps' % i) + raise ValueError('Block {idx} overlaps'.format(idx=i)) else: if blocs[i] + blengths[i] > self.length: - raise ValueError('Block %d extends beyond end' % i) + raise ValueError('Block {idx} extends beyond end' + .format(idx=i)) # no zero-length blocks if blengths[i] == 0: - raise ValueError('Zero-length block %d' % i) + raise ValueError('Zero-length block {idx}'.format(idx=i)) def equals(self, other): if not isinstance(other, BlockIndex): @@ -422,10 +423,10 @@ cdef class BlockIndex(SparseIndex): indices = np.empty(self.npoints, dtype=np.int32) - for b from 0 <= b < self.nblocks: + for b in range(self.nblocks): offset = self.locbuf[b] - for j from 0 <= j < self.lenbuf[b]: + for j in range(self.lenbuf[b]): indices[i] = offset + j i += 1 @@ -551,7 +552,7 @@ cdef class BlockIndex(SparseIndex): return -1 cum_len = 0 - for i from 0 <= i < self.nblocks: + for i in range(self.nblocks): if index >= locs[i] and index < locs[i] + lens[i]: return cum_len + index - locs[i] cum_len += lens[i] @@ -579,11 +580,11 @@ cdef class BlockIndex(SparseIndex): if self.npoints == 0: return results - for i from 0 <= i < n: + for i in range(n): ind_val = indexer[i] if not (ind_val < 0 or self.length <= ind_val): cum_len = 0 - for j from 0 <= j < self.nblocks: + for j in range(self.nblocks): if ind_val >= locs[j] and ind_val < locs[j] + lens[j]: results[i] = cum_len + ind_val - locs[j] cum_len += lens[j] @@ -824,7 +825,7 @@ def get_reindexer(ndarray[object, ndim=1] values, dict index_map): # out = np.empty(length, dtype=np.float64) -# for i from 0 <= i < length: +# for i in range(length): # if indexer[i] == -1: # pass diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index d1d9a6f02a72c..2843a3cf7dd28 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -190,7 +190,7 @@ cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_, # Wow, what a hack job. Need to do something about this # walk the two SparseVectors, adding matched locations... - for out_i from 0 <= out_i < out_index.npoints: + for out_i in range(out_index.npoints): if yblock == yindex.nblocks: # use y fill value out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} @@ -286,7 +286,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex, out_indices = out_index.indices # walk the two SparseVectors, adding matched locations... - for out_i from 0 <= out_i < out_index.npoints: + for out_i in range(out_index.npoints): if xi == xindex.npoints: # use x fill value out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 7e9e8b720872d..70a3f3f410636 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False import re cimport numpy as cnp diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 2efe506d2c154..9b98248818847 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False cimport cython from cython cimport Py_ssize_t @@ -31,17 +30,6 @@ from np_datetime cimport (npy_datetimestruct, # Constants -class WeekDay(object): - # TODO: Remove: This is not used outside of tests - MON = 0 - TUE = 1 - WED = 2 - THU = 3 - FRI = 4 - SAT = 5 - SUN = 6 - - _offset_to_period_map = { 'WEEKDAY': 'D', 'EOM': 'M', diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a99b1a5e1b4ef..1d7b1eec745ab 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -95,6 +95,7 @@ cdef extern from *: {0, 0, 0, 0, 0, 0, 1}}; """ int64_t daytime_conversion_factor_matrix[7][7] + # TODO: Can we get these frequencies from frequencies.FreqGroup? int FR_ANN int FR_QTR int FR_MTH diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 659afd152106d..866c9ca9d3ac7 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -50,18 +50,6 @@ int is_leapyear(npy_int64 year) { ((year % 100) != 0 || (year % 400) == 0); } -/* - * Sakamoto's method, from wikipedia - */ -int dayofweek(int y, int m, int d) { - int day; - static const int t[] = {0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4}; - y -= m < 3; - day = (y + y / 4 - y / 100 + y / 400 + t[m - 1] + d) % 7; - // convert to python day - return (day + 6) % 7; -} - /* * Adjusts a datetimestruct based on a minutes offset. Assumes * the current values are valid.g diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 3974d5083f51b..549d38409ca83 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -48,8 +48,6 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta val, NPY_DATETIMEUNIT fr, pandas_timedeltastruct *result); -int dayofweek(int y, int m, int d); - extern const int days_per_month_table[2][12]; // stuff numpy-derived code needs in header diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index ccd418a69c827..c5b821e089795 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -13,12 +13,22 @@ from pandas.tseries.frequencies import get_offset from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG from pandas.tseries.offsets import FY5253Quarter, FY5253 -from pandas._libs.tslibs.offsets import WeekDay from .common import assert_offset_equal, assert_onOffset from .test_offsets import Base +class WeekDay(object): + # TODO: Remove: This is not used outside of tests + MON = 0 + TUE = 1 + WED = 2 + THU = 3 + FRI = 4 + SAT = 5 + SUN = 6 + + def makeFY5253LastOfMonthQuarter(*args, **kwds): return FY5253Quarter(*args, variation="last", **kwds) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 57b9a281ac0eb..bca1a276260f1 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -17,7 +17,7 @@ from pandas.core.indexes.datetimes import ( _to_m8, DatetimeIndex, _daterange_cache) import pandas._libs.tslibs.offsets as liboffsets -from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset +from pandas._libs.tslibs.offsets import CacheableOffset from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, BusinessHour, WeekOfMonth, CBMonthEnd, CustomBusinessHour, @@ -39,6 +39,17 @@ from .common import assert_offset_equal, assert_onOffset +class WeekDay(object): + # TODO: Remove: This is not used outside of tests + MON = 0 + TUE = 1 + WED = 2 + THU = 3 + FRI = 4 + SAT = 5 + SUN = 6 + + #### # Misc function tests #### From 8ca4e51cf931f90b602b000af9c03574a9ab7785 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 14:50:19 -0700 Subject: [PATCH 09/16] deduplicate --- pandas/tests/tseries/offsets/test_fiscal.py | 13 +------------ pandas/tests/tseries/offsets/test_offsets.py | 1 + 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index c5b821e089795..223298dc42544 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -15,18 +15,7 @@ from pandas.tseries.offsets import FY5253Quarter, FY5253 from .common import assert_offset_equal, assert_onOffset -from .test_offsets import Base - - -class WeekDay(object): - # TODO: Remove: This is not used outside of tests - MON = 0 - TUE = 1 - WED = 2 - THU = 3 - FRI = 4 - SAT = 5 - SUN = 6 +from .test_offsets import Base, WeekDay def makeFY5253LastOfMonthQuarter(*args, **kwds): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index bca1a276260f1..e95f1ba11ad5c 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -39,6 +39,7 @@ from .common import assert_offset_equal, assert_onOffset + class WeekDay(object): # TODO: Remove: This is not used outside of tests MON = 0 From 6fc5320109282a5c8e0ccfaa4899fbd91912afd1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 14:59:06 -0700 Subject: [PATCH 10/16] update string formatting, cdef where possible --- pandas/_libs/tslibs/parsing.pyx | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index afda2046fd12d..19157f0ea66d7 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# cython: profile=False """ Parsing functions for datetime and datetime-like strings. """ @@ -133,15 +132,22 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): return res -def parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, - yearfirst=False, **kwargs): +cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, + yearfirst=False, **kwargs): """parse datetime string, only returns datetime Returns ------- - datetime + parsed : datetime + parsed2 : datetime/dateutil.parser._result + reso : str + inferred resolution + + Raises + ------ + ValueError : preliminary check suggests string is not datetime + DateParseError : error within dateutil """ - cdef: object parsed, reso @@ -162,7 +168,7 @@ def parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, # TODO: allow raise of errors within instead raise DateParseError(e) if parsed is None: - raise DateParseError("Could not parse %s" % date_string) + raise DateParseError("Could not parse {dstr}".format(dstr=date_string)) return parsed, parsed, reso @@ -244,8 +250,8 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, if not (1 <= quarter <= 4): msg = ('Incorrect quarterly string is given, quarter must be ' - 'between 1 and 4: {0}') - raise DateParseError(msg.format(date_string)) + 'between 1 and 4: {dstr}') + raise DateParseError(msg.format(dstr=date_string)) if freq is not None: # hack attack, #1228 @@ -253,7 +259,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, mnum = MONTH_NUMBERS[_get_rule_month(freq)] + 1 except (KeyError, ValueError): msg = ('Unable to retrieve month information from given ' - 'freq: {0}').format(freq) + 'freq: {freq}'.format(freq=freq)) raise DateParseError(msg) month = (mnum + (quarter - 1) * 3) % 12 + 1 @@ -290,8 +296,8 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, raise ValueError('Unable to parse {0}'.format(date_string)) -def dateutil_parse(object timestr, object default, ignoretz=False, - tzinfos=None, **kwargs): +cdef dateutil_parse(object timestr, object default, ignoretz=False, + tzinfos=None, **kwargs): """ lifted from dateutil to get resolution""" cdef: @@ -307,8 +313,8 @@ def dateutil_parse(object timestr, object default, ignoretz=False, res, _ = res if res is None: - msg = "Unknown datetime string format, unable to parse: {0}" - raise ValueError(msg.format(timestr)) + msg = "Unknown datetime string format, unable to parse: {timestr}" + raise ValueError(msg.format(timestr=timestr)) for attr in ["year", "month", "day", "hour", "minute", "second", "microsecond"]: @@ -318,8 +324,8 @@ def dateutil_parse(object timestr, object default, ignoretz=False, reso = attr if reso is None: - msg = "Unable to parse datetime string: {0}" - raise ValueError(msg.format(timestr)) + msg = "Unable to parse datetime string: {timestr}" + raise ValueError(msg.format(timestr=timestr)) if reso == 'microsecond': if repl['microsecond'] == 0: From ac5be3c4639aff3e8ef3af48e27d0948e756542e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 15:00:16 -0700 Subject: [PATCH 11/16] optimize stringy typechecking --- pandas/_libs/tslibs/parsing.pyx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 19157f0ea66d7..07163ddc9afe9 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -5,8 +5,8 @@ Parsing functions for datetime and datetime-like strings. import sys import re -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython.datetime cimport datetime @@ -16,10 +16,8 @@ import numpy as np # Avoid import from outside _libs if sys.version_info.major == 2: - string_types = basestring from StringIO import StringIO else: - string_types = str from io import StringIO @@ -113,7 +111,9 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): ------- datetime, datetime/dateutil.parser._result, str """ - if not isinstance(arg, string_types): + if not isinstance(arg, (str, unicode)): + # Note: cython recognizes `unicode` in both py2/py3, optimizes + # this check into a C call. return arg if getattr(freq, "_typ", None) == "dateoffset": @@ -197,7 +197,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, int year, quarter = -1, month, mnum, date_len # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 - assert isinstance(date_string, string_types) + assert isinstance(date_string, (str, unicode)) # len(date_string) == 0 # should be NaT??? @@ -344,7 +344,7 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata - elif isinstance(tzdata, string_types): + elif isinstance(tzdata, (str, unicode)): tzinfo = _dateutil_tzstr(tzdata) elif isinstance(tzdata, int): tzinfo = tzoffset(res.tzname, tzdata) @@ -582,7 +582,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse, if dt_str_parse is None or dt_str_split is None: return None - if not isinstance(dt_str, string_types): + if not isinstance(dt_str, (str, unicode)): return None day_attribute_and_format = (('day',), '%d', 2) From 03af64e69a7f2992e70780ef46592963124d79d2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 15:08:54 -0700 Subject: [PATCH 12/16] make kwargs explicit --- pandas/_libs/tslibs/parsing.pyx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 07163ddc9afe9..efaf62da71d20 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -133,7 +133,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, - yearfirst=False, **kwargs): + yearfirst=False): """parse datetime string, only returns datetime Returns @@ -163,7 +163,8 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False, try: parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=yearfirst) + dayfirst=dayfirst, yearfirst=yearfirst, + ignoretz=False, tzinfos=None) except Exception as e: # TODO: allow raise of errors within instead raise DateParseError(e) @@ -297,7 +298,7 @@ cdef inline object _parse_dateabbr_string(object date_string, object default, cdef dateutil_parse(object timestr, object default, ignoretz=False, - tzinfos=None, **kwargs): + tzinfos=None, dayfirst=None, yearfirst=None): """ lifted from dateutil to get resolution""" cdef: @@ -306,7 +307,7 @@ cdef dateutil_parse(object timestr, object default, ignoretz=False, dict repl = {} fobj = StringIO(str(timestr)) - res = DEFAULTPARSER._parse(fobj, **kwargs) + res = DEFAULTPARSER._parse(fobj, dayfirst=dayfirst, yearfirst=yearfirst) # dateutil 2.2 compat if isinstance(res, tuple): # PyTuple_Check From 0bfe72da544850ed0b2616f7535becb5a19fc57c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 17:13:04 -0700 Subject: [PATCH 13/16] fixup typo --- pandas/_libs/lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6c971a331fe35..7757ecf38f585 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -139,7 +139,7 @@ def fast_unique_multiple(list arrays): dict table = {} object val, stub = 0 - for i in range(n): + for i in range(k): buf = arrays[i] n = len(buf) for j in range(n): From 3fafcbfc039bdccab52923c67156aca042e60411 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 8 Aug 2018 10:31:01 -0700 Subject: [PATCH 14/16] rename helper.h --> inline_helper.h --- pandas/_libs/src/compat_helper.h | 2 +- pandas/_libs/src/{helper.h => inline_helper.h} | 0 pandas/_libs/src/klib/khash.h | 2 +- pandas/_libs/src/numpy_helper.h | 2 +- pandas/_libs/src/parse_helper.h | 2 +- pandas/_libs/src/parser/tokenizer.h | 2 +- pandas/_libs/src/skiplist.h | 2 +- 7 files changed, 6 insertions(+), 6 deletions(-) rename pandas/_libs/src/{helper.h => inline_helper.h} (100%) diff --git a/pandas/_libs/src/compat_helper.h b/pandas/_libs/src/compat_helper.h index 116cd91070a60..462f53392adee 100644 --- a/pandas/_libs/src/compat_helper.h +++ b/pandas/_libs/src/compat_helper.h @@ -11,7 +11,7 @@ The full license is in the LICENSE file, distributed with this software. #define PANDAS__LIBS_SRC_COMPAT_HELPER_H_ #include "Python.h" -#include "helper.h" +#include "inline_helper.h" /* PySlice_GetIndicesEx changes signature in PY3 diff --git a/pandas/_libs/src/helper.h b/pandas/_libs/src/inline_helper.h similarity index 100% rename from pandas/_libs/src/helper.h rename to pandas/_libs/src/inline_helper.h diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h index f034c1d619216..77ec519cc24da 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/src/klib/khash.h @@ -112,7 +112,7 @@ int main() { #include #include #include -#include "../helper.h" +#include "../inline_helper.h" #if UINT_MAX == 0xffffffffu diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 753cba6ce62aa..d44334906901a 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -11,7 +11,7 @@ The full license is in the LICENSE file, distributed with this software. #define PANDAS__LIBS_SRC_NUMPY_HELPER_H_ #include "Python.h" -#include "helper.h" +#include "inline_helper.h" #include "numpy/arrayobject.h" #include "numpy/arrayscalars.h" diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h index d17d9166ea3ee..4f9f825b15ffe 100644 --- a/pandas/_libs/src/parse_helper.h +++ b/pandas/_libs/src/parse_helper.h @@ -12,7 +12,7 @@ The full license is in the LICENSE file, distributed with this software. #include #include -#include "helper.h" +#include "inline_helper.h" #include "headers/portable.h" static double xstrtod(const char *p, char **q, char decimal, char sci, diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index f52ec81234423..9fc3593aaaf5b 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -27,7 +27,7 @@ See LICENSE for the license #define ERROR_INVALID_CHARS 3 #include "../headers/stdint.h" -#include "../helper.h" +#include "../inline_helper.h" #include "khash.h" diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h index 6d15f291ceb8b..60c1a56727777 100644 --- a/pandas/_libs/src/skiplist.h +++ b/pandas/_libs/src/skiplist.h @@ -20,7 +20,7 @@ Python recipe (http://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) #include #include #include -#include "helper.h" +#include "inline_helper.h" PANDAS_INLINE float __skiplist_nanf(void) { const union { From cc0f9645b25441e2bcba3671b825d4bd7a7d3a53 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 8 Aug 2018 12:28:12 -0700 Subject: [PATCH 15/16] C lint fixup --- pandas/_libs/src/inline_helper.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/inline_helper.h b/pandas/_libs/src/inline_helper.h index 26b4d033b963b..397ec8e7b2cb8 100644 --- a/pandas/_libs/src/inline_helper.h +++ b/pandas/_libs/src/inline_helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS__LIBS_SRC_HELPER_H_ -#define PANDAS__LIBS_SRC_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_INLINE_HELPER_H_ +#define PANDAS__LIBS_SRC_INLINE_HELPER_H_ #ifndef PANDAS_INLINE #if defined(__GNUC__) @@ -22,4 +22,4 @@ The full license is in the LICENSE file, distributed with this software. #endif #endif -#endif // PANDAS__LIBS_SRC_HELPER_H_ +#endif // PANDAS__LIBS_SRC_INLINE_HELPER_H_ From e1842aae647ccc73c9ee61de4d027344699a9e40 Mon Sep 17 00:00:00 2001 From: Chris Bartak Date: Thu, 9 Aug 2018 15:31:53 -0500 Subject: [PATCH 16/16] some typedefs for MSVC 2008 --- pandas/_libs/tslibs/period.pyx | 3 ++- pandas/_libs/tslibs/util.pxd | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 1d7b1eec745ab..f68b6d8fdef57 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -85,7 +85,8 @@ cdef extern from *: #define FR_NS 12000 /* Nanosecondly */ #define FR_UND -10000 /* Undefined */ - static int64_t daytime_conversion_factor_matrix[7][7] = { + // must use npy typedef b/c int64_t is aliased in cython-generated c + static npy_int64 daytime_conversion_factor_matrix[7][7] = { {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, {0, 1, 60, 3600, 3600000, 3600000000, 3600000000000}, {0, 0, 1, 60, 60000, 60000000, 60000000000}, diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index ada6cc6283401..0ba61fcc58f46 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -24,10 +24,10 @@ cdef extern from "Python.h": bint PyComplex_Check(object obj) nogil bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil +from numpy cimport int64_t cdef extern from "numpy/arrayobject.h": PyTypeObject PyFloatingArrType_Type - ctypedef signed long long int64_t int _import_array() except -1 cdef extern from "numpy/ndarrayobject.h":