diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a0a7ef3501d7f..5165eb8607219 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2223,14 +2223,14 @@ def objects_to_datetime64ns( allow_mixed=allow_mixed, ) result = result.reshape(data.shape, order=order) - except ValueError as err: + except ValueError: try: values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K")) # If tzaware, these values represent unix timestamps, so we # return them as i8 to distinguish from wall times values = values.reshape(data.shape, order=order) return values.view("i8"), tz_parsed - except (ValueError, TypeError): + except (ValueError, TypeError) as err: raise err if tz_parsed is not None: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 67a6975c21fdd..2697c91032af6 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -164,6 +164,7 @@ def _maybe_cache( format: str | None, cache: bool, convert_listlike: Callable, + errors: str = "raise", ) -> Series: """ Create a cache of unique dates from an array of dates @@ -177,6 +178,10 @@ def _maybe_cache( True attempts to create a cache of converted values convert_listlike : function Conversion function to apply on dates + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaT. + - If 'ignore', then invalid parsing will return the input. Returns ------- @@ -195,7 +200,16 @@ def _maybe_cache( unique_dates = unique(arg) if len(unique_dates) < len(arg): cache_dates = convert_listlike(unique_dates, format) - cache_array = Series(cache_dates, index=unique_dates) + try: + cache_array = Series(cache_dates, index=unique_dates) + except OutOfBoundsDatetime: + # caching attempts to create a DatetimeIndex, which may raise + # an OOB. If that's the desired behavior, then just reraise... + if errors == "raise": + raise + # ... otherwise, continue without the cache. + return cache_array + # GH#39882 and GH#35888 in case of None and NaT we get duplicates if not cache_array.index.is_unique: cache_array = cache_array[~cache_array.index.duplicated()] @@ -893,32 +907,25 @@ def to_datetime( else: result = arg.tz_localize(tz) elif isinstance(arg, ABCSeries): - cache_array = _maybe_cache(arg, format, cache, convert_listlike) + cache_array = _maybe_cache(arg, format, cache, convert_listlike, errors) + if not cache_array.empty: result = arg.map(cache_array) else: - values = convert_listlike(arg._values, format) + values = convert_listlike(arg._values, format, errors) result = arg._constructor(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)): result = _assemble_from_unit_mappings(arg, errors, tz) elif isinstance(arg, Index): - cache_array = _maybe_cache(arg, format, cache, convert_listlike) + cache_array = _maybe_cache(arg, format, cache, convert_listlike, errors) + if not cache_array.empty: result = _convert_and_box_cache(arg, cache_array, name=arg.name) else: result = convert_listlike(arg, format, name=arg.name) elif is_list_like(arg): - try: - cache_array = _maybe_cache(arg, format, cache, convert_listlike) - except OutOfBoundsDatetime: - # caching attempts to create a DatetimeIndex, which may raise - # an OOB. If that's the desired behavior, then just reraise... - if errors == "raise": - raise - # ... otherwise, continue without the cache. - from pandas import Series + cache_array = _maybe_cache(arg, format, cache, convert_listlike, errors) - cache_array = Series([], dtype=object) # just an empty array if not cache_array.empty: result = _convert_and_box_cache(arg, cache_array) else: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4867ba58838ef..3d6a37adb6823 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -7,6 +7,7 @@ timedelta, ) from decimal import Decimal +from functools import partial import locale from dateutil.parser import parse @@ -992,6 +993,30 @@ def test_convert_object_to_datetime_with_cache( ) tm.assert_series_equal(result_series, expected_series) + @pytest.mark.parametrize( + "constructor", + ( + partial(Series, dtype="object"), + list, + np.array, + ), + ) + @pytest.mark.parametrize( + "datetimelikes,expected_values", + (datetime(3000, 1, 1, 0, 0, 0, 0, pytz.UTC), NaT), + ) + def test_convert_object_to_datetime_nat_utc( + self, cache, datetimelikes, expected_values, constructor + ): + # GH#43732 + res = constructor(datetimelikes * (start_caching_at + 1)) + result_series = Series(to_datetime(res, errors="coerce", cache=cache, utc=True)) + expected_series = Series( + (expected_values * (start_caching_at + 1)), + dtype="datetime64[ns, UTC]", + ) + tm.assert_series_equal(result_series, expected_series) + @pytest.mark.parametrize( "date, format", [