Skip to content

Commit 716d32b

Browse files
author
MarcoGorelli
committed
empty strings -> nat
1 parent 113bdb3 commit 716d32b

File tree

3 files changed

+10
-20
lines changed

3 files changed

+10
-20
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,7 @@ Datetimelike
776776
- Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`)
777777
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime.datetime``, ``datetime.date``, or ``np.datetime64`` objects when non-ISO8601 ``format`` was passed (:issue:`49298`, :issue:`50036`)
778778
- Bug in :class:`Timestamp` was showing ``UserWarning`` which was not actionable by users (:issue:`50232`)
779+
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing empty string and non-ISO8601 format was passed. Now, empty strings will be parsed as :class:`NaT`, for compatibility with how is done for ISO8601 formats (:issue:`50251`)
779780
-
780781

781782
Timedelta

pandas/_libs/tslibs/strptime.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def array_strptime(
153153
for i in range(n):
154154
val = values[i]
155155
if isinstance(val, str):
156-
if val in nat_strings:
156+
if len(val) == 0 or val in nat_strings:
157157
iresult[i] = NPY_NAT
158158
continue
159159
elif checknull_with_nat_and_na(val):

pandas/tests/tools/test_to_datetime.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,17 +2019,13 @@ def test_to_datetime_timezone_name(self):
20192019
assert result == expected
20202020

20212021
@td.skip_if_not_us_locale
2022-
def test_to_datetime_with_apply_with_empty_str(self, cache):
2022+
@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
2023+
def test_to_datetime_with_apply_with_empty_str(self, cache, errors):
20232024
# this is only locale tested with US/None locales
2024-
# GH 5195
2025+
# GH 5195, GH50251
20252026
# with a format and coerce a single item to_datetime fails
20262027
td = Series(["May 04", "Jun 02", ""], index=[1, 2, 3])
2027-
msg = r"time data '' does not match format '%b %y' \(match\)"
2028-
with pytest.raises(ValueError, match=msg):
2029-
to_datetime(td, format="%b %y", errors="raise", cache=cache)
2030-
with pytest.raises(ValueError, match=msg):
2031-
td.apply(to_datetime, format="%b %y", errors="raise", cache=cache)
2032-
expected = to_datetime(td, format="%b %y", errors="coerce", cache=cache)
2028+
expected = to_datetime(td, format="%b %y", errors=errors, cache=cache)
20332029

20342030
result = td.apply(
20352031
lambda x: to_datetime(x, format="%b %y", errors="coerce", cache=cache)
@@ -2976,24 +2972,17 @@ def test_na_to_datetime(nulls_fixture, klass):
29762972
assert result[0] is NaT
29772973

29782974

2979-
def test_empty_string_datetime_coerce_format():
2980-
# GH13044
2975+
@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
2976+
def test_empty_string_datetime_coerce_format(errors):
2977+
# GH13044, GH50251
29812978
td = Series(["03/24/2016", "03/25/2016", ""])
29822979
format = "%m/%d/%Y"
29832980

29842981
# coerce empty string to pd.NaT
2985-
result = to_datetime(td, format=format, errors="coerce")
2982+
result = to_datetime(td, format=format, errors=errors)
29862983
expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]")
29872984
tm.assert_series_equal(expected, result)
29882985

2989-
# raise an exception in case a format is given
2990-
with pytest.raises(ValueError, match="does not match format"):
2991-
to_datetime(td, format=format, errors="raise")
2992-
2993-
# still raise an exception in case no format is given
2994-
with pytest.raises(ValueError, match="does not match format"):
2995-
to_datetime(td, errors="raise")
2996-
29972986

29982987
def test_empty_string_datetime_coerce__unit():
29992988
# GH13044

0 commit comments

Comments
 (0)