diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index e30a91ae3e10a..6680ab1049d12 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -542,6 +542,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, datetime dt ndarray[int64_t] trans int64_t[:] deltas + Py_ssize_t pos value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -562,7 +563,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, if typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + obj.fold = _infer_tsobject_fold(obj.value, trans, deltas, pos) # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, @@ -732,32 +733,35 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz): if is_fixed_offset(tz): # static/fixed tzinfo; in this case we know len(deltas) == 1 # This can come back with `typ` of either "fixed" or None - dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) - elif typ == 'pytz': - # i.e. treat_tz_as_pytz(tz) - pos = trans.searchsorted(obj.value, side='right') - 1 - tz = tz._tzinfos[tz._transition_info[pos]] - dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - elif typ == 'dateutil': - # i.e. treat_tz_as_dateutil(tz) - pos = trans.searchsorted(obj.value, side='right') - 1 - dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - # dateutil supports fold, so we infer fold from value - obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + local_val = obj.value + deltas[0] + else: - # Note: as of 2018-07-17 all tzinfo objects that are _not_ - # either pytz or dateutil have is_fixed_offset(tz) == True, - # so this branch will never be reached. - pass + pos = trans.searchsorted(obj.value, side='right') - 1 + local_val = obj.value + deltas[pos] + + if typ == 'pytz': + # i.e. treat_tz_as_pytz(tz) + tz = tz._tzinfos[tz._transition_info[pos]] + elif typ == 'dateutil': + # i.e. treat_tz_as_dateutil(tz) + # dateutil supports fold, so we infer fold from value + obj.fold = _infer_tsobject_fold(obj.value, trans, deltas, pos) + else: + # Note: as of 2018-07-17 all tzinfo objects that are _not_ + # either pytz or dateutil have is_fixed_offset(tz) == True, + # so this branch will never be reached. + pass + + dt64_to_dtstruct(local_val, &obj.dts) obj.tzinfo = tz cdef inline bint _infer_tsobject_fold( - _TSObject obj, + int64_t value, const int64_t[:] trans, const int64_t[:] deltas, - intp_t pos, + Py_ssize_t pos, ): """ Infer _TSObject fold property from value by assuming 0 and then setting @@ -765,12 +769,13 @@ cdef inline bint _infer_tsobject_fold( Parameters ---------- + val : int64_t obj : _TSObject trans : ndarray[int64_t] ndarray of offset transition points in nanoseconds since epoch. deltas : int64_t[:] array of offsets corresponding to transition points in trans. - pos : intp_t + pos : Py_ssize_t Position of the last transition point before taking fold into account. Returns @@ -791,7 +796,7 @@ cdef inline bint _infer_tsobject_fold( if pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: + if value - fold_delta < trans[pos]: fold = 1 return fold diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 3666d00707ac8..82e3c87abbbd6 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -1,5 +1,9 @@ from cpython.datetime cimport tzinfo -from numpy cimport int64_t +from numpy cimport ( + int64_t, + intp_t, + ndarray, +) cdef int64_t tz_convert_utc_to_tzlocal( @@ -9,3 +13,23 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz) cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* ) except? -1 + + +cdef class Localizer: + cdef: + tzinfo tz + bint use_utc + bint use_fixed + bint use_tzlocal + bint use_dst + bint use_pytz + ndarray trans + int64_t[::1] deltas + int64_t delta + str typ + + cdef intp_t prepare1(self, int64_t utc_val) + cdef ndarray[intp_t] prepare(self, const int64_t[:] stamps) + cdef inline int64_t utc_val_to_local_val( + self, int64_t utc_val, intp_t* pos, Py_ssize_t i + ) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 4dbfabad5dc84..02ae53580c2c6 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -399,26 +399,15 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): converted: int64 """ cdef: - int64_t delta - int64_t[:] deltas - ndarray[int64_t, ndim=1] trans intp_t pos + Localizer info = Localizer(tz) if val == NPY_NAT: return val - if is_utc(tz): - return val - elif is_tzlocal(tz): - return _tz_convert_tzlocal_utc(val, tz, to_utc=False) - elif is_fixed_offset(tz): - _, deltas, _ = get_dst_info(tz) - delta = deltas[0] - return val + delta - else: - trans, deltas, _ = get_dst_info(tz) - pos = trans.searchsorted(val, side="right") - 1 - return val + deltas[pos] + pos = info.prepare1(val) + + return info.utc_val_to_local_val(val, &pos, 0) def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): @@ -441,7 +430,7 @@ def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): return np.array([], dtype=np.int64) converted = _tz_convert_from_utc(vals, tz) - return np.array(converted, dtype=np.int64) + return np.asarray(converted, dtype=np.int64) @cython.boundscheck(False) @@ -460,53 +449,29 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): converted : ndarray[int64_t] """ cdef: - int64_t[:] converted, deltas + int64_t[:] converted Py_ssize_t i, n = len(vals) - int64_t val, delta - intp_t[:] pos - ndarray[int64_t] trans - str typ + int64_t val + ndarray[intp_t] pos_ + intp_t* pos + Localizer info = Localizer(tz) - if is_utc(tz): - return vals - elif is_tzlocal(tz): - converted = np.empty(n, dtype=np.int64) - for i in range(n): - val = vals[i] - if val == NPY_NAT: - converted[i] = NPY_NAT - else: - converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False) - else: - converted = np.empty(n, dtype=np.int64) + if info.use_utc: + # fastpath + return vals.copy() - trans, deltas, typ = get_dst_info(tz) + converted = np.empty(n, dtype=np.int64) - if typ not in ["pytz", "dateutil"]: - # FixedOffset, we know len(deltas) == 1 - delta = deltas[0] + pos_ = info.prepare(vals) + pos = cnp.PyArray_DATA(pos_) - for i in range(n): - val = vals[i] - if val == NPY_NAT: - converted[i] = val - else: - converted[i] = val + delta - - else: - pos = trans.searchsorted(vals, side="right") - 1 - - for i in range(n): - val = vals[i] - if val == NPY_NAT: - converted[i] = val - else: - if pos[i] < 0: - # TODO: How is this reached? Should we be checking for - # it elsewhere? - raise ValueError("First time before start of DST info") + for i in range(n): + val = vals[i] + if val == NPY_NAT: + converted[i] = NPY_NAT + continue - converted[i] = val + deltas[pos[i]] + converted[i] = info.utc_val_to_local_val(val, pos, i) return converted @@ -566,3 +531,73 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True, return val - delta else: return val + delta + + +@cython.freelist(16) +cdef class Localizer: + # cdef: + # tzinfo tz + # bint use_utc + # bint use_fixed + # bint use_tzlocal + # bint use_pytz + # bint use_dst + # ndarray trans + # int64_t[:] deltas + # int64_t delta + # str typ + + @cython.boundscheck(False) + def __cinit__(self, tzinfo tz): + self.tz = tz + if is_utc(tz) or tz is None: + self.use_utc = True + elif is_tzlocal(tz): + self.use_tzlocal = True + else: + trans, deltas, typ = get_dst_info(tz) + self.trans = trans + self.deltas = deltas + self.typ = typ + + if typ not in ["pytz", "dateutil"]: + # static/fixed; in this case we know that len(delta) == 1 + self.use_fixed = True + self.delta = deltas[0] + else: + self.use_dst = True + if typ == "pytz": + self.use_pytz = True + + cdef intp_t prepare1(self, int64_t utc_val): + if self.use_dst: + return self.trans.searchsorted(utc_val, side="right") - 1 + return 0 # won't be used + + cdef ndarray[intp_t] prepare(self, const int64_t[:] stamps): + if self.use_dst: + + return self.trans.searchsorted(stamps, side="right") - 1 + return placeholder # won't be used + + @cython.boundscheck(False) + cdef inline int64_t utc_val_to_local_val( + self, int64_t utc_val, intp_t* pos, Py_ssize_t i + ): + cdef: + int64_t local_val + + if self.use_utc: + local_val = utc_val + elif self.use_tzlocal: + local_val = _tz_convert_tzlocal_utc(utc_val, self.tz, to_utc=False) + elif self.use_fixed: + local_val = utc_val + self.delta + else: + local_val = utc_val + self.deltas[pos[i]] + + return local_val + + +# Placeholder to return from 'prepare' +cdef ndarray placeholder = np.array([], dtype=np.intp) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 17720de33ab33..514f58df2879f 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -1,5 +1,6 @@ import cython +cimport numpy as cnp from cpython.datetime cimport ( date, datetime, @@ -15,6 +16,8 @@ from numpy cimport ( ndarray, ) +cnp.import_array() + from .conversion cimport normalize_i8_stamp from .dtypes import Resolution @@ -30,12 +33,7 @@ from .np_datetime cimport ( from .offsets cimport BaseOffset from .period cimport get_period_ordinal from .timestamps cimport create_timestamp_from_ts -from .timezones cimport ( - get_dst_info, - is_tzlocal, - is_utc, -) -from .tzconversion cimport tz_convert_utc_to_tzlocal +from .tzconversion cimport Localizer # ------------------------------------------------------------------------- @@ -120,17 +118,14 @@ def ints_to_pydatetime( """ cdef: Py_ssize_t i, n = len(stamps) - ndarray[int64_t] trans - int64_t[:] deltas - intp_t[:] pos + ndarray[intp_t] pos_ + intp_t* pos npy_datetimestruct dts - object dt, new_tz - str typ - int64_t value, local_val, delta = NPY_NAT # dummy for delta + object new_tz + int64_t value, local_val ndarray[object] result = np.empty(n, dtype=object) object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint) - bint use_utc = False, use_tzlocal = False, use_fixed = False - bint use_pytz = False + Localizer info = Localizer(tz) if box == "date": assert (tz is None), "tz should be None when converting to date" @@ -147,19 +142,8 @@ def ints_to_pydatetime( "box must be one of 'datetime', 'date', 'time' or 'timestamp'" ) - if is_utc(tz) or tz is None: - use_utc = True - elif is_tzlocal(tz): - use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - if typ not in ["pytz", "dateutil"]: - # static/fixed; in this case we know that len(delta) == 1 - use_fixed = True - delta = deltas[0] - else: - pos = trans.searchsorted(stamps, side="right") - 1 - use_pytz = typ == "pytz" + pos_ = info.prepare(stamps) + pos = cnp.PyArray_DATA(pos_) for i in range(n): new_tz = tz @@ -167,26 +151,16 @@ def ints_to_pydatetime( if value == NPY_NAT: result[i] = NaT - else: - if use_utc: - local_val = value - elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(value, tz) - elif use_fixed: - local_val = value + delta - elif not use_pytz: - # i.e. dateutil - # no zone-name change for dateutil tzs - dst etc - # represented in single object. - local_val = value + deltas[pos[i]] - else: - # pytz - # find right representation of dst etc in pytz timezone - new_tz = tz._tzinfos[tz._transition_info[pos[i]]] - local_val = value + deltas[pos[i]] - - dt64_to_dtstruct(local_val, &dts) - result[i] = func_create(value, dts, new_tz, freq, fold) + continue + + local_val = info.utc_val_to_local_val(value, pos, i) + + if info.use_pytz: + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos[i]]] + + dt64_to_dtstruct(local_val, &dts) + result[i] = func_create(value, dts, new_tz, freq, fold) return result @@ -220,42 +194,26 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts): return RESO_DAY +@cython.wraparound(False) +@cython.boundscheck(False) def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: cdef: Py_ssize_t i, n = len(stamps) npy_datetimestruct dts int reso = RESO_DAY, curr_reso - ndarray[int64_t] trans - int64_t[:] deltas - intp_t[:] pos - int64_t local_val, delta = NPY_NAT - bint use_utc = False, use_tzlocal = False, use_fixed = False - - if is_utc(tz) or tz is None: - use_utc = True - elif is_tzlocal(tz): - use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - if typ not in ["pytz", "dateutil"]: - # static/fixed; in this case we know that len(delta) == 1 - use_fixed = True - delta = deltas[0] - else: - pos = trans.searchsorted(stamps, side="right") - 1 + ndarray[intp_t] pos_ + intp_t* pos + int64_t local_val + Localizer info = Localizer(tz) + + pos_ = info.prepare(stamps) + pos = cnp.PyArray_DATA(pos_) for i in range(n): if stamps[i] == NPY_NAT: continue - if use_utc: - local_val = stamps[i] - elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) - elif use_fixed: - local_val = stamps[i] + delta - else: - local_val = stamps[i] + deltas[pos[i]] + local_val = info.utc_val_to_local_val(stamps[i], pos, i) dt64_to_dtstruct(local_val, &dts) curr_reso = _reso_stamp(&dts) @@ -287,39 +245,20 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t cdef: Py_ssize_t i, n = len(stamps) int64_t[:] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] trans - int64_t[:] deltas - str typ - Py_ssize_t[:] pos - int64_t local_val, delta = NPY_NAT - bint use_utc = False, use_tzlocal = False, use_fixed = False - - if is_utc(tz) or tz is None: - use_utc = True - elif is_tzlocal(tz): - use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - if typ not in ["pytz", "dateutil"]: - # static/fixed; in this case we know that len(delta) == 1 - use_fixed = True - delta = deltas[0] - else: - pos = trans.searchsorted(stamps, side="right") - 1 + ndarray[intp_t] pos_ + intp_t* pos + int64_t local_val + Localizer info = Localizer(tz) + + pos_ = info.prepare(stamps) + pos = cnp.PyArray_DATA(pos_) for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT continue - if use_utc: - local_val = stamps[i] - elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) - elif use_fixed: - local_val = stamps[i] + delta - else: - local_val = stamps[i] + deltas[pos[i]] + local_val = info.utc_val_to_local_val(stamps[i], pos, i) result[i] = normalize_i8_stamp(local_val) @@ -345,36 +284,17 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: """ cdef: Py_ssize_t i, n = len(stamps) - ndarray[int64_t] trans - int64_t[:] deltas - intp_t[:] pos - int64_t local_val, delta = NPY_NAT - str typ + int64_t local_val int64_t day_nanos = 24 * 3600 * 1_000_000_000 - bint use_utc = False, use_tzlocal = False, use_fixed = False + ndarray[intp_t] pos_ + intp_t* pos + Localizer info = Localizer(tz) - if is_utc(tz) or tz is None: - use_utc = True - elif is_tzlocal(tz): - use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - if typ not in ["pytz", "dateutil"]: - # static/fixed; in this case we know that len(delta) == 1 - use_fixed = True - delta = deltas[0] - else: - pos = trans.searchsorted(stamps, side="right") - 1 + pos_ = info.prepare(stamps) + pos = cnp.PyArray_DATA(pos_) for i in range(n): - if use_utc: - local_val = stamps[i] - elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) - elif use_fixed: - local_val = stamps[i] + delta - else: - local_val = stamps[i] + deltas[pos[i]] + local_val = info.utc_val_to_local_val(stamps[i], pos, i) if local_val % day_nanos != 0: return False @@ -391,39 +311,21 @@ def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz): cdef: Py_ssize_t i, n = len(stamps) int64_t[:] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] trans - int64_t[:] deltas - Py_ssize_t[:] pos + ndarray[intp_t] pos_ + intp_t* pos npy_datetimestruct dts - int64_t local_val, delta = NPY_NAT - bint use_utc = False, use_tzlocal = False, use_fixed = False + int64_t local_val + Localizer info = Localizer(tz) - if is_utc(tz) or tz is None: - use_utc = True - elif is_tzlocal(tz): - use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - if typ not in ["pytz", "dateutil"]: - # static/fixed; in this case we know that len(delta) == 1 - use_fixed = True - delta = deltas[0] - else: - pos = trans.searchsorted(stamps, side="right") - 1 + pos_ = info.prepare(stamps) + pos = cnp.PyArray_DATA(pos_) for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT continue - if use_utc: - local_val = stamps[i] - elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) - elif use_fixed: - local_val = stamps[i] + delta - else: - local_val = stamps[i] + deltas[pos[i]] + local_val = info.utc_val_to_local_val(stamps[i], pos, i) dt64_to_dtstruct(local_val, &dts) result[i] = get_period_ordinal(&dts, freq)