Skip to content

POC/REF: de-duplicate utc->local code #46246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 26 additions & 21 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
datetime dt
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t pos

value = dtstruct_to_dt64(&dts)
obj.dts = dts
Expand All @@ -562,7 +563,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,

if typ == 'dateutil':
pos = trans.searchsorted(obj.value, side='right') - 1
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
obj.fold = _infer_tsobject_fold(obj.value, trans, deltas, pos)

# Keep the converter same as PyDateTime's
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
Expand Down Expand Up @@ -732,45 +733,49 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
if is_fixed_offset(tz):
# static/fixed tzinfo; in this case we know len(deltas) == 1
# This can come back with `typ` of either "fixed" or None
dt64_to_dtstruct(obj.value + deltas[0], &obj.dts)
elif typ == 'pytz':
# i.e. treat_tz_as_pytz(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
tz = tz._tzinfos[tz._transition_info[pos]]
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
elif typ == 'dateutil':
# i.e. treat_tz_as_dateutil(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
# dateutil supports fold, so we infer fold from value
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
local_val = obj.value + deltas[0]

else:
# Note: as of 2018-07-17 all tzinfo objects that are _not_
# either pytz or dateutil have is_fixed_offset(tz) == True,
# so this branch will never be reached.
pass
pos = trans.searchsorted(obj.value, side='right') - 1
local_val = obj.value + deltas[pos]

if typ == 'pytz':
# i.e. treat_tz_as_pytz(tz)
tz = tz._tzinfos[tz._transition_info[pos]]
elif typ == 'dateutil':
# i.e. treat_tz_as_dateutil(tz)
# dateutil supports fold, so we infer fold from value
obj.fold = _infer_tsobject_fold(obj.value, trans, deltas, pos)
else:
# Note: as of 2018-07-17 all tzinfo objects that are _not_
# either pytz or dateutil have is_fixed_offset(tz) == True,
# so this branch will never be reached.
pass

dt64_to_dtstruct(local_val, &obj.dts)

obj.tzinfo = tz


cdef inline bint _infer_tsobject_fold(
_TSObject obj,
int64_t value,
const int64_t[:] trans,
const int64_t[:] deltas,
intp_t pos,
Py_ssize_t pos,
):
"""
Infer _TSObject fold property from value by assuming 0 and then setting
to 1 if necessary.

Parameters
----------
val : int64_t
obj : _TSObject
trans : ndarray[int64_t]
ndarray of offset transition points in nanoseconds since epoch.
deltas : int64_t[:]
array of offsets corresponding to transition points in trans.
pos : intp_t
pos : Py_ssize_t
Position of the last transition point before taking fold into account.

Returns
Expand All @@ -791,7 +796,7 @@ cdef inline bint _infer_tsobject_fold(

if pos > 0:
fold_delta = deltas[pos - 1] - deltas[pos]
if obj.value - fold_delta < trans[pos]:
if value - fold_delta < trans[pos]:
fold = 1

return fold
Expand Down
26 changes: 25 additions & 1 deletion pandas/_libs/tslibs/tzconversion.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from cpython.datetime cimport tzinfo
from numpy cimport int64_t
from numpy cimport (
int64_t,
intp_t,
ndarray,
)


cdef int64_t tz_convert_utc_to_tzlocal(
Expand All @@ -9,3 +13,23 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz)
cdef int64_t tz_localize_to_utc_single(
int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=*
) except? -1


cdef class Localizer:
cdef:
tzinfo tz
bint use_utc
bint use_fixed
bint use_tzlocal
bint use_dst
bint use_pytz
ndarray trans
int64_t[::1] deltas
int64_t delta
str typ

cdef intp_t prepare1(self, int64_t utc_val)
cdef ndarray[intp_t] prepare(self, const int64_t[:] stamps)
cdef inline int64_t utc_val_to_local_val(
self, int64_t utc_val, intp_t* pos, Py_ssize_t i
)
149 changes: 92 additions & 57 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -399,26 +399,15 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz):
converted: int64
"""
cdef:
int64_t delta
int64_t[:] deltas
ndarray[int64_t, ndim=1] trans
intp_t pos
Localizer info = Localizer(tz)

if val == NPY_NAT:
return val

if is_utc(tz):
return val
elif is_tzlocal(tz):
return _tz_convert_tzlocal_utc(val, tz, to_utc=False)
elif is_fixed_offset(tz):
_, deltas, _ = get_dst_info(tz)
delta = deltas[0]
return val + delta
else:
trans, deltas, _ = get_dst_info(tz)
pos = trans.searchsorted(val, side="right") - 1
return val + deltas[pos]
pos = info.prepare1(val)

return info.utc_val_to_local_val(val, &pos, 0)


def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
Expand All @@ -441,7 +430,7 @@ def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
return np.array([], dtype=np.int64)

converted = _tz_convert_from_utc(vals, tz)
return np.array(converted, dtype=np.int64)
return np.asarray(converted, dtype=np.int64)


@cython.boundscheck(False)
Expand All @@ -460,53 +449,29 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
converted : ndarray[int64_t]
"""
cdef:
int64_t[:] converted, deltas
int64_t[:] converted
Py_ssize_t i, n = len(vals)
int64_t val, delta
intp_t[:] pos
ndarray[int64_t] trans
str typ
int64_t val
ndarray[intp_t] pos_
intp_t* pos
Localizer info = Localizer(tz)

if is_utc(tz):
return vals
elif is_tzlocal(tz):
converted = np.empty(n, dtype=np.int64)
for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = NPY_NAT
else:
converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False)
else:
converted = np.empty(n, dtype=np.int64)
if info.use_utc:
# fastpath
return vals.copy()

trans, deltas, typ = get_dst_info(tz)
converted = np.empty(n, dtype=np.int64)

if typ not in ["pytz", "dateutil"]:
# FixedOffset, we know len(deltas) == 1
delta = deltas[0]
pos_ = info.prepare(vals)
pos = <intp_t*>cnp.PyArray_DATA(pos_)

for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = val
else:
converted[i] = val + delta

else:
pos = trans.searchsorted(vals, side="right") - 1

for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = val
else:
if pos[i] < 0:
# TODO: How is this reached? Should we be checking for
# it elsewhere?
raise ValueError("First time before start of DST info")
for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = NPY_NAT
continue

converted[i] = val + deltas[pos[i]]
converted[i] = info.utc_val_to_local_val(val, pos, i)

return converted

Expand Down Expand Up @@ -566,3 +531,73 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True,
return val - delta
else:
return val + delta


@cython.freelist(16)
cdef class Localizer:
# cdef:
# tzinfo tz
# bint use_utc
# bint use_fixed
# bint use_tzlocal
# bint use_pytz
# bint use_dst
# ndarray trans
# int64_t[:] deltas
# int64_t delta
# str typ

@cython.boundscheck(False)
def __cinit__(self, tzinfo tz):
self.tz = tz
if is_utc(tz) or tz is None:
self.use_utc = True
elif is_tzlocal(tz):
self.use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
self.trans = trans
self.deltas = deltas
self.typ = typ

if typ not in ["pytz", "dateutil"]:
# static/fixed; in this case we know that len(delta) == 1
self.use_fixed = True
self.delta = deltas[0]
else:
self.use_dst = True
if typ == "pytz":
self.use_pytz = True

cdef intp_t prepare1(self, int64_t utc_val):
if self.use_dst:
return self.trans.searchsorted(utc_val, side="right") - 1
return 0 # won't be used

cdef ndarray[intp_t] prepare(self, const int64_t[:] stamps):
if self.use_dst:

return self.trans.searchsorted(stamps, side="right") - 1
return placeholder # won't be used

@cython.boundscheck(False)
cdef inline int64_t utc_val_to_local_val(
self, int64_t utc_val, intp_t* pos, Py_ssize_t i
):
cdef:
int64_t local_val

if self.use_utc:
local_val = utc_val
elif self.use_tzlocal:
local_val = _tz_convert_tzlocal_utc(utc_val, self.tz, to_utc=False)
elif self.use_fixed:
local_val = utc_val + self.delta
else:
local_val = utc_val + self.deltas[pos[i]]

return local_val


# Placeholder to return from 'prepare'
cdef ndarray placeholder = np.array([], dtype=np.intp)
Loading