Skip to content

Commit 5a5ed18

Browse files
committed
PERF: speed up tz-aware operations by making searchsorted call in bulk,
rather than piecewise
1 parent aefe756 commit 5a5ed18

File tree

1 file changed

+17
-11
lines changed

1 file changed

+17
-11
lines changed

pandas/_libs/tslibs/conversion.pyx

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -636,34 +636,41 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
636636
"""
637637
cdef:
638638
Py_ssize_t n = len(values)
639-
Py_ssize_t i, pos
639+
Py_ssize_t i
640+
int64_t[:] pos
640641
int64_t[:] result = np.empty(n, dtype=np.int64)
641642
ndarray[int64_t] trans
642643
int64_t[:] deltas
643644
int64_t v
645+
bint tz_is_local
644646

645-
if not is_tzlocal(tz):
647+
tz_is_local = is_tzlocal(tz)
648+
649+
if not tz_is_local:
646650
# get_dst_info cannot extract offsets from tzlocal because its
647651
# dependent on a datetime
648652
trans, deltas, _ = get_dst_info(tz)
649653
if not to_utc:
650654
# We add `offset` below instead of subtracting it
651655
deltas = -1 * np.array(deltas, dtype='i8')
652656

657+
if not tz_is_local:
658+
pos = trans.searchsorted(values, side='right') - 1
659+
653660
for i in range(n):
654661
v = values[i]
655662
if v == NPY_NAT:
656663
result[i] = v
657-
elif is_tzlocal(tz):
664+
elif tz_is_local:
658665
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=to_utc)
659666
else:
660667
# TODO: Is it more efficient to call searchsorted pointwise or
661668
# on `values` outside the loop? We are not consistent about this.
662669
# relative effiency of pointwise increases with number of iNaTs
663-
pos = trans.searchsorted(v, side='right') - 1
664-
if pos < 0:
670+
671+
if pos[i] < 0:
665672
raise ValueError('First time before start of DST info')
666-
result[i] = v - deltas[pos]
673+
result[i] = v - deltas[pos[i]]
667674

668675
return result
669676

@@ -1252,9 +1259,9 @@ def is_date_array_normalized(int64_t[:] stamps, object tz=None):
12521259
is_normalized : bool True if all stamps are normalized
12531260
"""
12541261
cdef:
1255-
Py_ssize_t pos, i, n = len(stamps)
1262+
Py_ssize_t i, n = len(stamps)
12561263
ndarray[int64_t] trans
1257-
int64_t[:] deltas
1264+
int64_t[:] deltas, pos
12581265
npy_datetimestruct dts
12591266
int64_t local_val, delta
12601267
str typ
@@ -1283,11 +1290,10 @@ def is_date_array_normalized(int64_t[:] stamps, object tz=None):
12831290
return False
12841291

12851292
else:
1293+
pos = trans.searchsorted(stamps) - 1
12861294
for i in range(n):
12871295
# Adjust datetime64 timestamp, recompute datetimestruct
1288-
pos = trans.searchsorted(stamps[i]) - 1
1289-
1290-
dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
1296+
dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
12911297
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
12921298
return False
12931299

0 commit comments

Comments
 (0)