Skip to content

Commit c6bbda5

Browse files
authored
REF factor out parse_pydatetime from array_to_datetime (#49866)
* factor out parse_pydatetime * fix segfault * 🚚 rename validate_tzout to convert_timezone * change ensure_reso return value * add except? -1 to ensure_reso Co-authored-by: MarcoGorelli <>
1 parent 2236346 commit c6bbda5

File tree

3 files changed

+127
-30
lines changed

3 files changed

+127
-30
lines changed

pandas/_libs/tslib.pyx

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ from pandas._libs.tslibs.conversion cimport (
5050
_TSObject,
5151
cast_from_unit,
5252
convert_datetime_to_tsobject,
53+
convert_timezone,
5354
get_datetime64_nanos,
55+
parse_pydatetime,
5456
precision_from_unit,
5557
)
5658
from pandas._libs.tslibs.nattype cimport (
@@ -59,7 +61,6 @@ from pandas._libs.tslibs.nattype cimport (
5961
c_nat_strings as nat_strings,
6062
)
6163
from pandas._libs.tslibs.timestamps cimport _Timestamp
62-
from pandas._libs.tslibs.timezones cimport tz_compare
6364

6465
from pandas._libs.tslibs import (
6566
Resolution,
@@ -525,35 +526,16 @@ cpdef array_to_datetime(
525526
seen_datetime = True
526527
if val.tzinfo is not None:
527528
found_tz = True
528-
if utc_convert:
529-
_ts = convert_datetime_to_tsobject(val, None)
530-
_ts.ensure_reso(NPY_FR_ns)
531-
iresult[i] = _ts.value
532-
elif found_naive:
533-
raise ValueError('Tz-aware datetime.datetime '
534-
'cannot be converted to '
535-
'datetime64 unless utc=True')
536-
elif tz_out is not None and not tz_compare(tz_out, val.tzinfo):
537-
raise ValueError('Tz-aware datetime.datetime '
538-
'cannot be converted to '
539-
'datetime64 unless utc=True')
540-
else:
541-
found_tz = True
542-
tz_out = val.tzinfo
543-
_ts = convert_datetime_to_tsobject(val, None)
544-
_ts.ensure_reso(NPY_FR_ns)
545-
iresult[i] = _ts.value
546-
547529
else:
548530
found_naive = True
549-
if found_tz and not utc_convert:
550-
raise ValueError('Cannot mix tz-aware with '
551-
'tz-naive values')
552-
if isinstance(val, _Timestamp):
553-
iresult[i] = val.as_unit("ns").value
554-
else:
555-
iresult[i] = pydatetime_to_dt64(val, &dts)
556-
check_dts_bounds(&dts)
531+
tz_out = convert_timezone(
532+
val.tzinfo,
533+
tz_out,
534+
found_naive,
535+
found_tz,
536+
utc_convert,
537+
)
538+
result[i] = parse_pydatetime(val, &dts, utc_convert)
557539

558540
elif PyDate_Check(val):
559541
seen_datetime = True

pandas/_libs/tslibs/conversion.pxd

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ from pandas._libs.tslibs.np_datetime cimport (
1212
NPY_DATETIMEUNIT,
1313
npy_datetimestruct,
1414
)
15+
from pandas._libs.tslibs.timestamps cimport _Timestamp
16+
from pandas._libs.tslibs.timezones cimport tz_compare
1517

1618

1719
cdef class _TSObject:
@@ -22,7 +24,7 @@ cdef class _TSObject:
2224
bint fold
2325
NPY_DATETIMEUNIT creso
2426

25-
cdef void ensure_reso(self, NPY_DATETIMEUNIT creso)
27+
cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1
2628

2729

2830
cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
@@ -40,3 +42,17 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1
4042
cpdef (int64_t, int) precision_from_unit(str unit)
4143

4244
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
45+
46+
cdef tzinfo convert_timezone(
47+
tzinfo tz_in,
48+
tzinfo tz_out,
49+
bint found_naive,
50+
bint found_tz,
51+
bint utc_convert,
52+
)
53+
54+
cdef int64_t parse_pydatetime(
55+
object val,
56+
npy_datetimestruct *dts,
57+
bint utc_convert,
58+
) except? -1

pandas/_libs/tslibs/conversion.pyx

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ from pandas._libs.tslibs.np_datetime cimport (
4141
npy_datetimestruct,
4242
npy_datetimestruct_to_datetime,
4343
pandas_datetime_to_datetimestruct,
44+
pydatetime_to_dt64,
4445
pydatetime_to_dtstruct,
4546
string_to_dts,
4647
)
@@ -65,6 +66,7 @@ from pandas._libs.tslibs.nattype cimport (
6566
c_NaT as NaT,
6667
c_nat_strings as nat_strings,
6768
)
69+
from pandas._libs.tslibs.timestamps cimport _Timestamp
6870
from pandas._libs.tslibs.tzconversion cimport (
6971
Localizer,
7072
tz_localize_to_utc_single,
@@ -208,9 +210,10 @@ cdef class _TSObject:
208210
self.fold = 0
209211
self.creso = NPY_FR_ns # default value
210212

211-
cdef void ensure_reso(self, NPY_DATETIMEUNIT creso):
213+
cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1:
212214
if self.creso != creso:
213215
self.value = convert_reso(self.value, self.creso, creso, False)
216+
return self.value
214217

215218

216219
cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
@@ -642,3 +645,99 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
642645
elif isinstance(dt, ABCTimestamp):
643646
return dt.tz_localize(tz)
644647
return _localize_pydatetime(dt, tz)
648+
649+
650+
cdef tzinfo convert_timezone(
651+
tzinfo tz_in,
652+
tzinfo tz_out,
653+
bint found_naive,
654+
bint found_tz,
655+
bint utc_convert,
656+
):
657+
"""
658+
Validate that ``tz_in`` can be converted/localized to ``tz_out``.
659+
660+
Parameters
661+
----------
662+
tz_in : tzinfo
663+
Timezone info of element being processed.
664+
tz_out : tzinfo
665+
Timezone info of output.
666+
found_naive : bool
667+
Whether a timezone-naive element has been found so far.
668+
found_tz : bool
669+
Whether a timezone-aware element has been found so far.
670+
utc_convert : bool
671+
Whether to convert/localize to UTC.
672+
673+
Returns
674+
-------
675+
tz_info
676+
Timezone info of output.
677+
678+
Raises
679+
------
680+
ValueError
681+
If ``tz_in`` can't be converted/localized to ``tz_out``.
682+
"""
683+
if tz_in is not None:
684+
if utc_convert:
685+
pass
686+
elif found_naive:
687+
raise ValueError('Tz-aware datetime.datetime '
688+
'cannot be converted to '
689+
'datetime64 unless utc=True')
690+
elif tz_out is not None and not tz_compare(tz_out, tz_in):
691+
raise ValueError('Tz-aware datetime.datetime '
692+
'cannot be converted to '
693+
'datetime64 unless utc=True')
694+
else:
695+
tz_out = tz_in
696+
else:
697+
if found_tz and not utc_convert:
698+
raise ValueError('Cannot mix tz-aware with '
699+
'tz-naive values')
700+
return tz_out
701+
702+
703+
cdef int64_t parse_pydatetime(
704+
object val,
705+
npy_datetimestruct *dts,
706+
bint utc_convert,
707+
) except? -1:
708+
"""
709+
Convert pydatetime to datetime64.
710+
711+
Parameters
712+
----------
713+
val
714+
Element being processed.
715+
dts : *npy_datetimestruct
716+
Needed to use in pydatetime_to_dt64, which writes to it.
717+
utc_convert : bool
718+
Whether to convert/localize to UTC.
719+
720+
Raises
721+
------
722+
OutOfBoundsDatetime
723+
"""
724+
cdef:
725+
_TSObject _ts
726+
int64_t result
727+
728+
if val.tzinfo is not None:
729+
if utc_convert:
730+
_ts = convert_datetime_to_tsobject(val, None)
731+
_ts.ensure_reso(NPY_FR_ns)
732+
result = _ts.value
733+
else:
734+
_ts = convert_datetime_to_tsobject(val, None)
735+
_ts.ensure_reso(NPY_FR_ns)
736+
result = _ts.value
737+
else:
738+
if isinstance(val, _Timestamp):
739+
result = val.as_unit("ns").value
740+
else:
741+
result = pydatetime_to_dt64(val, dts)
742+
check_dts_bounds(dts)
743+
return result

0 commit comments

Comments
 (0)