Skip to content

Commit 381dd41

Browse files
thomasqueirozbandrekwrHenryRocha
committed
BUG: pandas.to_datetime raises exception when more than 50 values needs coercion to NaT (#43732)
Raise proper error from objects_to_datetime64ns Check for OutOfBoundsDatetime in _maybe_cache Co-authored-by: André Elimelek de Weber (andrekwr) <[email protected]> Co-authored-by: Henry Rocha (HenryRocha) <[email protected]>
1 parent 6b75ed6 commit 381dd41

File tree

2 files changed

+23
-16
lines changed

2 files changed

+23
-16
lines changed

pandas/core/arrays/datetimes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2188,14 +2188,14 @@ def objects_to_datetime64ns(
21882188
allow_mixed=allow_mixed,
21892189
)
21902190
result = result.reshape(data.shape, order=order)
2191-
except ValueError as err:
2191+
except ValueError:
21922192
try:
21932193
values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
21942194
# If tzaware, these values represent unix timestamps, so we
21952195
# return them as i8 to distinguish from wall times
21962196
values = values.reshape(data.shape, order=order)
21972197
return values.view("i8"), tz_parsed
2198-
except (ValueError, TypeError):
2198+
except (ValueError, TypeError) as err:
21992199
raise err
22002200

22012201
if tz_parsed is not None:

pandas/core/tools/datetimes.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ def _maybe_cache(
162162
format: str | None,
163163
cache: bool,
164164
convert_listlike: Callable,
165+
errors: str = "raise",
165166
) -> Series:
166167
"""
167168
Create a cache of unique dates from an array of dates
@@ -175,6 +176,10 @@ def _maybe_cache(
175176
True attempts to create a cache of converted values
176177
convert_listlike : function
177178
Conversion function to apply on dates
179+
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
180+
- If 'raise', then invalid parsing will raise an exception.
181+
- If 'coerce', then invalid parsing will be set as NaT.
182+
- If 'ignore', then invalid parsing will return the input.
178183
179184
Returns
180185
-------
@@ -193,7 +198,16 @@ def _maybe_cache(
193198
unique_dates = unique(arg)
194199
if len(unique_dates) < len(arg):
195200
cache_dates = convert_listlike(unique_dates, format)
196-
cache_array = Series(cache_dates, index=unique_dates)
201+
try:
202+
cache_array = Series(cache_dates, index=unique_dates)
203+
except OutOfBoundsDatetime:
204+
# caching attempts to create a DatetimeIndex, which may raise
205+
# an OOB. If that's the desired behavior, then just reraise...
206+
if errors == "raise":
207+
raise
208+
# ... otherwise, continue without the cache.
209+
return cache_array
210+
197211
# GH#39882 and GH#35888 in case of None and NaT we get duplicates
198212
if not cache_array.index.is_unique:
199213
cache_array = cache_array[~cache_array.index.duplicated()]
@@ -891,32 +905,25 @@ def to_datetime(
891905
# error: Too many arguments for "tz_localize" of "NaTType"
892906
result = result.tz_localize(tz) # type: ignore[call-arg]
893907
elif isinstance(arg, ABCSeries):
894-
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
908+
cache_array = _maybe_cache(arg, format, cache, convert_listlike, errors)
909+
895910
if not cache_array.empty:
896911
result = arg.map(cache_array)
897912
else:
898-
values = convert_listlike(arg._values, format)
913+
values = convert_listlike(arg._values, format, errors)
899914
result = arg._constructor(values, index=arg.index, name=arg.name)
900915
elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):
901916
result = _assemble_from_unit_mappings(arg, errors, tz)
902917
elif isinstance(arg, Index):
903-
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
918+
cache_array = _maybe_cache(arg, format, cache, convert_listlike, errors)
919+
904920
if not cache_array.empty:
905921
result = _convert_and_box_cache(arg, cache_array, name=arg.name)
906922
else:
907923
result = convert_listlike(arg, format, name=arg.name)
908924
elif is_list_like(arg):
909-
try:
910-
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
911-
except OutOfBoundsDatetime:
912-
# caching attempts to create a DatetimeIndex, which may raise
913-
# an OOB. If that's the desired behavior, then just reraise...
914-
if errors == "raise":
915-
raise
916-
# ... otherwise, continue without the cache.
917-
from pandas import Series
925+
cache_array = _maybe_cache(arg, format, cache, convert_listlike, errors)
918926

919-
cache_array = Series([], dtype=object) # just an empty array
920927
if not cache_array.empty:
921928
result = _convert_and_box_cache(arg, cache_array)
922929
else:

0 commit comments

Comments
 (0)