Skip to content

Commit 2d41f2f

Browse files
committed
BUG: ignore errors for invalid dates in to_datetime with coerce (pandas-dev#25512)
parsing.try_parse_year_month_day() in _attempt_YYYYMMDD() throws not only ValueError but also OverFlowError for incorrect dates. So handling of this error was added.
1 parent 0a516c1 commit 2d41f2f

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

pandas/core/tools/datetimes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -775,21 +775,21 @@ def calc_with_mask(carg, mask):
775775
# try intlike / strings that are ints
776776
try:
777777
return calc(arg.astype(np.int64))
778-
except ValueError:
778+
except (ValueError, OverflowError):
779779
pass
780780

781781
# a float with actual np.nan
782782
try:
783783
carg = arg.astype(np.float64)
784784
return calc_with_mask(carg, notna(carg))
785-
except ValueError:
785+
except (ValueError, OverflowError):
786786
pass
787787

788788
# string with NaN-like
789789
try:
790790
mask = ~algorithms.isin(arg, list(tslib.nat_strings))
791791
return calc_with_mask(arg, mask)
792-
except ValueError:
792+
except (ValueError, OverflowError):
793793
pass
794794

795795
return None

pandas/tests/indexes/datetimes/test_tools.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,21 @@ def test_to_datetime_format_YYYYMMDD(self, cache):
8383
result = to_datetime(s, format='%Y%m%d', cache=cache)
8484
assert_series_equal(result, expected)
8585

86+
# GH 25512
87+
# strings with invalid date values, errors=coerce
88+
s = Series(['19801222', '20010012', '10019999', np.nan])
89+
result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
90+
cache=cache)
91+
expected = Series([Timestamp('19801222'), np.nan, np.nan, np.nan])
92+
tm.assert_series_equal(result, expected)
93+
94+
# integers with invalid date values, errors=coerce
95+
s = Series([20010012, 20190813, 20019999, np.nan])
96+
result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
97+
cache=cache)
98+
expected = Series([np.nan, Timestamp('20190813'), np.nan, np.nan])
99+
tm.assert_series_equal(result, expected)
100+
86101
# coercion
87102
# GH 7930
88103
s = Series([20121231, 20141231, 99991231])

0 commit comments

Comments
 (0)