From 838157409809f3a331585908b727ba10758d0ab5 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 14:08:15 +0000 Subject: [PATCH 01/26] added warnings when parse inconsistent with dayfirst arg --- pandas/_libs/tslibs/parsing.pyx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 212e40b30848a..ef438b632e027 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -3,6 +3,7 @@ Parsing functions for datetime and datetime-like strings. """ import re import time +import warnings from libc.string cimport strchr @@ -168,14 +169,28 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): # date_string can't be converted to date, above format return None, None + swapped_day_and_month = False if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ and (month <= MAX_MONTH or day <= MAX_MONTH): if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: day, month = month, day + swapped_day_and_month = True if PY_VERSION_HEX >= 0x03060100: # In Python <= 3.6.0 there is no range checking for invalid dates # in C api, thus we call faster C version for 3.6.1 or newer + + if dayfirst and not swapped_day_and_month: + warnings.warn(f"Parsing {date_string} MM/DD format.") + elif not dayfirst and swapped_day_and_month: + warnings.warn(f"Parsing {date_string} DD/MM format.") + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + + if dayfirst and not swapped_day_and_month: + warnings.warn(f"Parsing {date_string} MM/DD format.") + elif not dayfirst and swapped_day_and_month: + warnings.warn(f"Parsing {date_string} DD/MM format.") + return datetime(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError(f"Invalid date specified ({month}/{day})") From 12a36d8b57084137fd4b42157ff88716f67098d5 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 17:06:28 +0000 Subject: [PATCH 02/26] improved error message --- pandas/_libs/tslibs/parsing.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ef438b632e027..3dbc85b7df9de 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -180,16 +180,16 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): # in C api, thus we call faster C version for 3.6.1 or newer if dayfirst and not swapped_day_and_month: - warnings.warn(f"Parsing {date_string} MM/DD format.") + warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: - warnings.warn(f"Parsing {date_string} DD/MM format.") + warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") return datetime_new(year, month, day, 0, 0, 0, 0, None), reso if dayfirst and not swapped_day_and_month: - warnings.warn(f"Parsing {date_string} MM/DD format.") + warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: - warnings.warn(f"Parsing {date_string} DD/MM format.") + warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") return datetime(year, month, day, 0, 0, 0, 0, None), reso From 0ee3428ac8111603e7cc252550597aff9488bbb6 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 17:08:40 +0000 Subject: [PATCH 03/26] TST: added tests --- pandas/tests/tools/test_to_datetime.py | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7351f50aea8c1..aea49a3b5bf2b 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1839,6 +1839,60 @@ def test_dayfirst(self, cache): tm.assert_index_equal(expected, idx5) tm.assert_index_equal(expected, idx6) + def test_dayfirst_warnings(self): + # GH 12585 + + # CASE 1: valid input + arr = ["31/12/2014", "10/03/2011"] + expected = DatetimeIndex( + ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None + ) + + # A. dayfirst arg correct, no warning + res1 = to_datetime(arr, dayfirst=True) + tm.assert_index_equal(expected, res1) + + # B. dayfirst arg incorrect, warning + incorrect output + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + res2 = to_datetime(arr, dayfirst=False) + with pytest.raises(AssertionError): + tm.assert_index_equal(expected, res2) + + # C. dayfirst default arg, same as B + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + res3 = to_datetime(arr, dayfirst=False) + with pytest.raises(AssertionError): + tm.assert_index_equal(expected, res3) + + # D. infer_datetime_format=True overrides dayfirst default + # no warning + correct result + res4 = to_datetime(arr, infer_datetime_format=True) + tm.assert_index_equal(expected, res4) + + # CASE 2: invalid input + # cannot consistently process with single format + # warnings *always* raised + + arr = ["31/12/2014", "03/30/2011"] + + msg = r"Parsing '03/30/2011' in MM/DD/YYYY format." + with pytest.warns(UserWarning, match=msg): + to_datetime(arr, dayfirst=True) + + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + to_datetime(arr, dayfirst=False) + + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + to_datetime(arr) + + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + to_datetime(arr, infer_datetime_format=True) + @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray]) def test_to_datetime_dta_tz(self, klass): # GH#27733 From 9f1f7c9e5a02d18189986a396baf5877827e0cc5 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 18:04:49 +0000 Subject: [PATCH 04/26] removed trailing whitespaces --- pandas/_libs/tslibs/parsing.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 3dbc85b7df9de..53d23575aa69c 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -178,19 +178,19 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if PY_VERSION_HEX >= 0x03060100: # In Python <= 3.6.0 there is no range checking for invalid dates # in C api, thus we call faster C version for 3.6.1 or newer - + if dayfirst and not swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso - + if dayfirst and not swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - + return datetime(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError(f"Invalid date specified ({month}/{day})") From 67e9d95fd27f6e0092911226a757f73a8d284712 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 18:05:35 +0000 Subject: [PATCH 05/26] removed pytest.warns --- pandas/tests/tools/test_to_datetime.py | 36 +++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index aea49a3b5bf2b..a69706bb8700b 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1853,16 +1853,12 @@ def test_dayfirst_warnings(self): tm.assert_index_equal(expected, res1) # B. dayfirst arg incorrect, warning + incorrect output - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - res2 = to_datetime(arr, dayfirst=False) + res2 = to_datetime(arr, dayfirst=False) with pytest.raises(AssertionError): tm.assert_index_equal(expected, res2) # C. dayfirst default arg, same as B - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - res3 = to_datetime(arr, dayfirst=False) + res3 = to_datetime(arr, dayfirst=False) with pytest.raises(AssertionError): tm.assert_index_equal(expected, res3) @@ -1876,22 +1872,26 @@ def test_dayfirst_warnings(self): # warnings *always* raised arr = ["31/12/2014", "03/30/2011"] + # first in DD/MM/YYYY, second in MM/DD/YYYY + expected = DatetimeIndex( + ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None + ) - msg = r"Parsing '03/30/2011' in MM/DD/YYYY format." - with pytest.warns(UserWarning, match=msg): - to_datetime(arr, dayfirst=True) + # A. use dayfirst=True + res5 = to_datetime(arr, dayfirst=True) + tm.assert_index_equal(expected, res5) - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - to_datetime(arr, dayfirst=False) + # B. use dayfirst=False + res6 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected, res6) - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - to_datetime(arr) + # C. use dayfirst default arg, same as B + res7 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected, res7) - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - to_datetime(arr, infer_datetime_format=True) + # D. use infer_datetime_format=True + res8 = to_datetime(arr, infer_datetime_format=True) + tm.assert_index_equal(expected, res8) @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray]) def test_to_datetime_dta_tz(self, klass): From 390969ffa88a59a7ff02b1c202d3d69d1d8ce7ef Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 5 Aug 2021 19:36:27 +0100 Subject: [PATCH 06/26] wip --- pandas/_libs/tslib.pyx | 11 +++++++++-- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/parsing.pyi | 2 +- pandas/_libs/tslibs/parsing.pyx | 28 ++++++++++++++-------------- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 6b1c0f851f8e7..2f9468966e4e0 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -531,7 +531,7 @@ cpdef array_to_datetime( return values, tz_out try: - py_dt = parse_datetime_string(val, + py_dt, swapped_day_and_month = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) # If the dateutil parser returned tzinfo, capture it @@ -727,6 +727,7 @@ cdef _array_to_datetime_object( # We return an object array and only attempt to parse: # 1) NaT or NaT-like values # 2) datetime strings, which we return as datetime.datetime + swapped_day_and_month = False for i in range(n): val = values[i] if checknull_with_nat_and_na(val) or PyDateTime_Check(val): @@ -737,7 +738,7 @@ cdef _array_to_datetime_object( oresult[i] = 'NaT' continue try: - oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, + oresult[i], swapped_day_and_month = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) pydatetime_to_dt64(oresult[i], &dts) check_dts_bounds(&dts) @@ -752,6 +753,12 @@ cdef _array_to_datetime_object( if is_raise: raise return values, None + + if dayfirst and not swapped_day_and_month: + warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") + elif not dayfirst and swapped_day_and_month: + warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") + return oresult, None diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 865185f9acea7..82f0fe7ef604f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -645,7 +645,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, if string_to_dts_failed or do_parse_datetime_string: try: - ts = parse_datetime_string(ts, dayfirst=dayfirst, + ts, swapped_day_and_month = parse_datetime_string(ts, dayfirst=dayfirst, yearfirst=yearfirst) except (ValueError, OverflowError): raise ValueError("could not convert string to Timestamp") diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index fc08a48cee343..d0e02912d481e 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -11,7 +11,7 @@ def parse_datetime_string( dayfirst: bool = ..., yearfirst: bool = ..., **kwargs, -) -> datetime: ... +) -> tuple[datetime, bool]: ... def parse_time_string( arg: str, freq: BaseOffset | str | None = ..., diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 53d23575aa69c..3efdc1a76c832 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -146,7 +146,7 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if length == 10: # parsing MM?DD?YYYY and DD?MM?YYYY dates if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): - return None, None + return None, None, None month = _parse_2digit(buf) day = _parse_2digit(buf + 3) year = _parse_4digit(buf + 6) @@ -157,17 +157,17 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if buf[2] == b'.' or _is_not_delimiter(buf[2]): # we cannot reliably tell whether e.g. 10.2010 is a float # or a date, thus we refuse to parse it here - return None, None + return None, None, None month = _parse_2digit(buf) year = _parse_4digit(buf + 3) reso = 'month' else: - return None, None + return None, None, None if month < 0 or day < 0 or year < 1000: # some part is not an integer, so # date_string can't be converted to date, above format - return None, None + return None, None, None swapped_day_and_month = False if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ @@ -184,14 +184,14 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): elif not dayfirst and swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso, swapped_day_and_month if dayfirst and not swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - return datetime(year, month, day, 0, 0, 0, 0, None), reso + return datetime(year, month, day, 0, 0, 0, 0, None), reso, swapped_day_and_month raise DateParseError(f"Invalid date specified ({month}/{day})") @@ -234,7 +234,7 @@ def parse_datetime_string( bint dayfirst=False, bint yearfirst=False, **kwargs, -) -> datetime: +) -> tuple[datetime, bool]: """ Parse datetime string, only returns datetime. Also cares special handling matching time patterns. @@ -254,15 +254,15 @@ def parse_datetime_string( # use current datetime as default, not pass _DEFAULT_DATETIME dt = du_parse(date_string, dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) - return dt + return dt, False - dt, _ = _parse_delimited_date(date_string, dayfirst) + dt, _, swapped_day_and_month = _parse_delimited_date(date_string, dayfirst) if dt is not None: - return dt + return dt, swapped_day_and_month try: dt, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq=None) - return dt + return dt, False except DateParseError: raise except ValueError: @@ -276,7 +276,7 @@ def parse_datetime_string( # TypeError: 'NoneType' object is not iterable raise ValueError('Given date string not likely a datetime.') - return dt + return dt, False def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None): @@ -309,10 +309,10 @@ def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None): if yearfirst is None: yearfirst = get_option("display.date_yearfirst") - res = parse_datetime_string_with_reso(arg, freq=freq, + res, swapped_day_and_month = parse_datetime_string_with_reso(arg, freq=freq, dayfirst=dayfirst, yearfirst=yearfirst) - return res + return res, swapped_day_and_month cdef parse_datetime_string_with_reso( From 9ee56ac79ef241a880b29cccc3b7d52713466592 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 5 Aug 2021 19:37:57 +0100 Subject: [PATCH 07/26] revert --- pandas/_libs/tslib.pyx | 11 ++--------- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/parsing.pyi | 2 +- pandas/_libs/tslibs/parsing.pyx | 28 ++++++++++++++-------------- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 2f9468966e4e0..6b1c0f851f8e7 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -531,7 +531,7 @@ cpdef array_to_datetime( return values, tz_out try: - py_dt, swapped_day_and_month = parse_datetime_string(val, + py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) # If the dateutil parser returned tzinfo, capture it @@ -727,7 +727,6 @@ cdef _array_to_datetime_object( # We return an object array and only attempt to parse: # 1) NaT or NaT-like values # 2) datetime strings, which we return as datetime.datetime - swapped_day_and_month = False for i in range(n): val = values[i] if checknull_with_nat_and_na(val) or PyDateTime_Check(val): @@ -738,7 +737,7 @@ cdef _array_to_datetime_object( oresult[i] = 'NaT' continue try: - oresult[i], swapped_day_and_month = parse_datetime_string(val, dayfirst=dayfirst, + oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) pydatetime_to_dt64(oresult[i], &dts) check_dts_bounds(&dts) @@ -753,12 +752,6 @@ cdef _array_to_datetime_object( if is_raise: raise return values, None - - if dayfirst and not swapped_day_and_month: - warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") - elif not dayfirst and swapped_day_and_month: - warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - return oresult, None diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 82f0fe7ef604f..865185f9acea7 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -645,7 +645,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, if string_to_dts_failed or do_parse_datetime_string: try: - ts, swapped_day_and_month = parse_datetime_string(ts, dayfirst=dayfirst, + ts = parse_datetime_string(ts, dayfirst=dayfirst, yearfirst=yearfirst) except (ValueError, OverflowError): raise ValueError("could not convert string to Timestamp") diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index d0e02912d481e..fc08a48cee343 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -11,7 +11,7 @@ def parse_datetime_string( dayfirst: bool = ..., yearfirst: bool = ..., **kwargs, -) -> tuple[datetime, bool]: ... +) -> datetime: ... def parse_time_string( arg: str, freq: BaseOffset | str | None = ..., diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 3efdc1a76c832..53d23575aa69c 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -146,7 +146,7 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if length == 10: # parsing MM?DD?YYYY and DD?MM?YYYY dates if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): - return None, None, None + return None, None month = _parse_2digit(buf) day = _parse_2digit(buf + 3) year = _parse_4digit(buf + 6) @@ -157,17 +157,17 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if buf[2] == b'.' or _is_not_delimiter(buf[2]): # we cannot reliably tell whether e.g. 10.2010 is a float # or a date, thus we refuse to parse it here - return None, None, None + return None, None month = _parse_2digit(buf) year = _parse_4digit(buf + 3) reso = 'month' else: - return None, None, None + return None, None if month < 0 or day < 0 or year < 1000: # some part is not an integer, so # date_string can't be converted to date, above format - return None, None, None + return None, None swapped_day_and_month = False if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ @@ -184,14 +184,14 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): elif not dayfirst and swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - return datetime_new(year, month, day, 0, 0, 0, 0, None), reso, swapped_day_and_month + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso if dayfirst and not swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - return datetime(year, month, day, 0, 0, 0, 0, None), reso, swapped_day_and_month + return datetime(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError(f"Invalid date specified ({month}/{day})") @@ -234,7 +234,7 @@ def parse_datetime_string( bint dayfirst=False, bint yearfirst=False, **kwargs, -) -> tuple[datetime, bool]: +) -> datetime: """ Parse datetime string, only returns datetime. Also cares special handling matching time patterns. @@ -254,15 +254,15 @@ def parse_datetime_string( # use current datetime as default, not pass _DEFAULT_DATETIME dt = du_parse(date_string, dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) - return dt, False + return dt - dt, _, swapped_day_and_month = _parse_delimited_date(date_string, dayfirst) + dt, _ = _parse_delimited_date(date_string, dayfirst) if dt is not None: - return dt, swapped_day_and_month + return dt try: dt, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq=None) - return dt, False + return dt except DateParseError: raise except ValueError: @@ -276,7 +276,7 @@ def parse_datetime_string( # TypeError: 'NoneType' object is not iterable raise ValueError('Given date string not likely a datetime.') - return dt, False + return dt def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None): @@ -309,10 +309,10 @@ def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None): if yearfirst is None: yearfirst = get_option("display.date_yearfirst") - res, swapped_day_and_month = parse_datetime_string_with_reso(arg, freq=freq, + res = parse_datetime_string_with_reso(arg, freq=freq, dayfirst=dayfirst, yearfirst=yearfirst) - return res, swapped_day_and_month + return res cdef parse_datetime_string_with_reso( From 0744cedca0cc44190e0fe56c8cfd552509b3ee8e Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 5 Aug 2021 19:54:29 +0100 Subject: [PATCH 08/26] set stacklevel, assert warning messages --- pandas/_libs/tslibs/parsing.pyx | 24 +++++++++++++++--- pandas/tests/tools/test_to_datetime.py | 34 ++++++++++++++++++++------ 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 53d23575aa69c..f10e29286b6e3 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -180,16 +180,32 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): # in C api, thus we call faster C version for 3.6.1 or newer if dayfirst and not swapped_day_and_month: - warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") + warnings.warn( + f"Parsing '{date_string}' in MM/DD/YYYY format. Provide format " + "or specify infer_datetime_format=True for consistent parsing.", + stacklevel=4, + ) elif not dayfirst and swapped_day_and_month: - warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") + warnings.warn( + f"Parsing '{date_string}' in DD/MM/YYYY format. Provide format " + "or specify infer_datetime_format=True for consistent parsing.", + stacklevel=4, + ) return datetime_new(year, month, day, 0, 0, 0, 0, None), reso if dayfirst and not swapped_day_and_month: - warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") + warnings.warn( + f"Parsing '{date_string}' in MM/DD/YYYY format. Provide format or " + "specify infer_datetime_format=True for consistent parsing.", + stacklevel=4, + ) elif not dayfirst and swapped_day_and_month: - warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") + warnings.warn( + f"Parsing '{date_string}' in DD/MM/YYYY format. Provide format or " + "specify infer_datetime_format=True for consistent parsing.", + stacklevel=4, + ) return datetime(year, month, day, 0, 0, 0, 0, None), reso diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a69706bb8700b..ac60237a99687 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1841,6 +1841,14 @@ def test_dayfirst(self, cache): def test_dayfirst_warnings(self): # GH 12585 + warning_msg_day_first = ( + "Parsing '31/12/2014' in DD/MM/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) + warning_msg_month_first = ( + "Parsing '03/30/2011' in MM/DD/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) # CASE 1: valid input arr = ["31/12/2014", "10/03/2011"] @@ -1853,13 +1861,19 @@ def test_dayfirst_warnings(self): tm.assert_index_equal(expected, res1) # B. dayfirst arg incorrect, warning + incorrect output - res2 = to_datetime(arr, dayfirst=False) - with pytest.raises(AssertionError): + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res2 = to_datetime(arr, dayfirst=False) + with pytest.raises(AssertionError, match=None), tm.assert_produces_warning( + UserWarning, match=warning_msg_day_first + ): tm.assert_index_equal(expected, res2) # C. dayfirst default arg, same as B - res3 = to_datetime(arr, dayfirst=False) - with pytest.raises(AssertionError): + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res3 = to_datetime(arr, dayfirst=False) + with pytest.raises(AssertionError, match=None), tm.assert_produces_warning( + UserWarning, match=warning_msg_day_first + ): tm.assert_index_equal(expected, res3) # D. infer_datetime_format=True overrides dayfirst default @@ -1878,19 +1892,23 @@ def test_dayfirst_warnings(self): ) # A. use dayfirst=True - res5 = to_datetime(arr, dayfirst=True) + with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first): + res5 = to_datetime(arr, dayfirst=True) tm.assert_index_equal(expected, res5) # B. use dayfirst=False - res6 = to_datetime(arr, dayfirst=False) + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res6 = to_datetime(arr, dayfirst=False) tm.assert_index_equal(expected, res6) # C. use dayfirst default arg, same as B - res7 = to_datetime(arr, dayfirst=False) + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res7 = to_datetime(arr, dayfirst=False) tm.assert_index_equal(expected, res7) # D. use infer_datetime_format=True - res8 = to_datetime(arr, infer_datetime_format=True) + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res8 = to_datetime(arr, infer_datetime_format=True) tm.assert_index_equal(expected, res8) @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray]) From 56867d4fbfd19314ca96309ea30f66b54f9d83f3 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 6 Aug 2021 11:48:59 +0100 Subject: [PATCH 09/26] okwarning in user guide --- doc/source/user_guide/timeseries.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 6f005f912fe37..a26ae72bf31ca 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -204,6 +204,7 @@ If you use dates which start with the day first (i.e. European style), you can pass the ``dayfirst`` flag: .. ipython:: python + :okwarning: pd.to_datetime(["04-01-2012 10:00"], dayfirst=True) From e6557c745bff44633cc05c1b7c95cb70b70f96bc Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 6 Aug 2021 14:31:37 +0100 Subject: [PATCH 10/26] :art: --- pandas/_libs/tslibs/parsing.pyx | 41 ++++++++++++++------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index f10e29286b6e3..cfa16df367bce 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -82,6 +82,11 @@ class DateParseError(ValueError): _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) +PARSING_WARNING_MSG = ( + "Parsing '{date_string}' in {format} format. Provide format " + "or specify infer_datetime_format=True for consistent parsing." +) + cdef: set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} @@ -175,38 +180,26 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: day, month = month, day swapped_day_and_month = True - if PY_VERSION_HEX >= 0x03060100: - # In Python <= 3.6.0 there is no range checking for invalid dates - # in C api, thus we call faster C version for 3.6.1 or newer - - if dayfirst and not swapped_day_and_month: - warnings.warn( - f"Parsing '{date_string}' in MM/DD/YYYY format. Provide format " - "or specify infer_datetime_format=True for consistent parsing.", - stacklevel=4, - ) - elif not dayfirst and swapped_day_and_month: - warnings.warn( - f"Parsing '{date_string}' in DD/MM/YYYY format. Provide format " - "or specify infer_datetime_format=True for consistent parsing.", - stacklevel=4, - ) - - return datetime_new(year, month, day, 0, 0, 0, 0, None), reso - if dayfirst and not swapped_day_and_month: warnings.warn( - f"Parsing '{date_string}' in MM/DD/YYYY format. Provide format or " - "specify infer_datetime_format=True for consistent parsing.", + PARSING_WARNING_MSG.format( + date_string=date_string, + format='MM/DD/YYYY' + ), stacklevel=4, ) elif not dayfirst and swapped_day_and_month: warnings.warn( - f"Parsing '{date_string}' in DD/MM/YYYY format. Provide format or " - "specify infer_datetime_format=True for consistent parsing.", + PARSING_WARNING_MSG.format( + date_string=date_string, + format='DD/MM/YYYY' + ), stacklevel=4, ) - + if PY_VERSION_HEX >= 0x03060100: + # In Python <= 3.6.0 there is no range checking for invalid dates + # in C api, thus we call faster C version for 3.6.1 or newer + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso return datetime(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError(f"Invalid date specified ({month}/{day})") From ee6fbde5554b30f522f6e37db1f43a199b5a24d1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 6 Aug 2021 15:30:18 +0100 Subject: [PATCH 11/26] catch warnings --- pandas/tests/io/parser/test_parse_dates.py | 39 ++++++++++++++++++---- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index c7b5efa5bf0c9..754b1dfe7c2b9 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -8,6 +8,7 @@ datetime, ) from io import StringIO +import warnings from dateutil.parser import parse as du_parse from hypothesis import ( @@ -1556,16 +1557,16 @@ def test_invalid_parse_delimited_date(all_parsers, date_string): "date_string,dayfirst,expected", [ # %d/%m/%Y; month > 12 thus replacement - ("13/02/2019", False, datetime(2019, 2, 13)), ("13/02/2019", True, datetime(2019, 2, 13)), # %m/%d/%Y; day > 12 thus there will be no replacement ("02/13/2019", False, datetime(2019, 2, 13)), - ("02/13/2019", True, datetime(2019, 2, 13)), # %d/%m/%Y; dayfirst==True thus replacement ("04/02/2019", True, datetime(2019, 2, 4)), ], ) -def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected): +def test_parse_delimited_date_swap_no_warning( + all_parsers, date_string, dayfirst, expected +): parser = all_parsers expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") result = parser.read_csv( @@ -1574,6 +1575,30 @@ def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected) tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "date_string,dayfirst,expected", + [ + # %d/%m/%Y; month > 12 thus replacement + ("13/02/2019", False, datetime(2019, 2, 13)), + # %m/%d/%Y; day > 12 thus there will be no replacement + ("02/13/2019", True, datetime(2019, 2, 13)), + ], +) +def test_parse_delimited_date_swap_with_warning( + all_parsers, date_string, dayfirst, expected +): + parser = all_parsers + expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") + warning_msg = ( + "Provide format or specify infer_datetime_format=True for consistent parsing" + ) + with tm.assert_produces_warning(UserWarning, match=warning_msg): + result = parser.read_csv( + StringIO(date_string), header=None, dayfirst=dayfirst, parse_dates=[0] + ) + tm.assert_frame_equal(result, expected) + + def _helper_hypothesis_delimited_date(call, date_string, **kwargs): msg, result = None, None try: @@ -1602,9 +1627,11 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti except_in_dateutil, except_out_dateutil = None, None date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) - except_out_dateutil, result = _helper_hypothesis_delimited_date( - parse_datetime_string, date_string, dayfirst=dayfirst - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + except_out_dateutil, result = _helper_hypothesis_delimited_date( + parse_datetime_string, date_string, dayfirst=dayfirst + ) except_in_dateutil, expected = _helper_hypothesis_delimited_date( du_parse, date_string, From 15797a8258beb9b4b6bb69090cb14c994bd91017 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 6 Aug 2021 16:23:47 +0100 Subject: [PATCH 12/26] fixup --- pandas/tests/scalar/period/test_period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 3cc81ef851306..7e6c2a452f1a0 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -572,7 +572,7 @@ def test_to_timestamp_tz_arg(self, tzstr): with tm.assert_produces_warning(FutureWarning): p = Period("1/1/2005", freq="A").to_timestamp(freq="A", tz=tzstr) - exp = Timestamp("31/12/2005", tz="UTC").tz_convert(tzstr) + exp = Timestamp(day=31, month=12, year=2005, tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) assert p == exp From 07834ed952da5bef87576839a70e22c41e163b02 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 8 Aug 2021 09:33:11 +0100 Subject: [PATCH 13/26] add to to_datetime docstring, add whatsnew note --- doc/source/whatsnew/v1.4.0.rst | 16 +++++++++++++--- pandas/core/tools/datetimes.py | 18 ++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fa9c424351b00..aa71a4853a97a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -50,10 +50,19 @@ Notable bug fixes These are bug fixes that might have notable behavior changes. -.. _whatsnew_140.notable_bug_fixes.notable_bug_fix1: +.. _whatsnew_140.notable_bug_fixes.inconsistent_date_string_parsing: -notable_bug_fix1 -^^^^^^^^^^^^^^^^ +Inconsistent date string parsing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``dayfirst`` option of :func:`to_datetime` isn't strict, and this can lead to surprising behaviour: + +.. ipython:: python + :okwarning: + + pd.to_datetime(["31-12-2021"], dayfirst=False) + +Now, a warning will be raised if a date string cannot be parsed accordance to the given ``dayfirst`` value. .. _whatsnew_140.notable_bug_fixes.notable_bug_fix2: @@ -196,6 +205,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`) +- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised (:issue:`12585`) - Timedelta diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 26349a3b2c6c1..f73cab0d8ec22 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -701,8 +701,15 @@ def to_datetime( Specify a date parse order if `arg` is str or its list-likes. If True, parses dates with the day first, eg 10/11/12 is parsed as 2012-11-10. - Warning: dayfirst=True is not strict, but will prefer to parse - with day first (this is a known bug, based on dateutil behavior). + + .. warning:: + + dayfirst=True is not strict, but will prefer to parse + with day first (this is a known bug, based on dateutil behavior). + If a date string cannot be parsed in accordance with the given + `dayfirst` option (e.g. ``to_datetime(['31-12-2021'], dayfirst=False)``) + then a warning will be shown. + yearfirst : bool, default False Specify a date parse order if `arg` is str or its list-likes. @@ -711,8 +718,11 @@ def to_datetime( - If both dayfirst and yearfirst are True, yearfirst is preceded (same as dateutil). - Warning: yearfirst=True is not strict, but will prefer to parse - with year first (this is a known bug, based on dateutil behavior). + .. warning:: + + yearfirst=True is not strict, but will prefer to parse + with year first (this is a known bug, based on dateutil behavior). + utc : bool, default None Return UTC DatetimeIndex if True (converting any tz-aware datetime.datetime objects as well). From 1d08ae936f57fe31737f853ed8d8ecdb1bcd44fd Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 21 Aug 2021 19:27:40 +0100 Subject: [PATCH 14/26] wip --- pandas/_libs/tslibs/parsing.pyx | 10 +++++++++- pandas/tests/io/parser/test_parse_dates.py | 12 +++++++++--- pandas/tests/scalar/period/test_period.py | 14 +++++++------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index cfa16df367bce..048e1b2d96f12 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -39,7 +39,7 @@ cnp.import_array() from dateutil.parser import ( DEFAULTPARSER, - parse as du_parse, + parse as du_parse_raw, ) from dateutil.relativedelta import relativedelta from dateutil.tz import ( @@ -238,6 +238,14 @@ cdef inline bint does_string_look_like_time(str parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 +def du_parse(*args, **kwargs): + warnings.warn( + "Parsing datetime strings without a format specified, " + "please specify a format to avoid unexpected results" + ) + return du_parse_raw(*args, **kwargs) + + def parse_datetime_string( str date_string, bint dayfirst=False, diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 754b1dfe7c2b9..a108b13aebf2c 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -439,7 +439,9 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning): parse_dates = {"actual": [1, 2], "nominal": [1, 3]} parser = all_parsers - with tm.assert_produces_warning(warning, check_stacklevel=False): + with tm.assert_produces_warning( + UserWarning, check_stacklevel=False, raise_on_extra_warnings=False + ): result = parser.read_csv( StringIO(data), header=None, @@ -1239,7 +1241,9 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni 2001-01-06, 00:00:00, 1.0, 11. """ parser = all_parsers - with tm.assert_produces_warning(warning, check_stacklevel=False): + with tm.assert_produces_warning( + warning, check_stacklevel=False, raise_on_extra_warnings=False + ): result = parser.read_csv( StringIO(data), header=[0, 1], @@ -1333,7 +1337,9 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni ) def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning): parser = all_parsers - with tm.assert_produces_warning(warning, check_stacklevel=False): + with tm.assert_produces_warning( + warning, check_stacklevel=False, raise_on_extra_warnings=False + ): result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs) # Python can sometimes be flaky about how diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 7e6c2a452f1a0..0c0d5e1597582 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -552,7 +552,7 @@ def test_hash(self): @pytest.mark.parametrize("tzstr", ["Europe/Brussels", "Asia/Tokyo", "US/Pacific"]) def test_to_timestamp_tz_arg(self, tzstr): # GH#34522 tz kwarg deprecated - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): p = Period("1/1/2005", freq="M").to_timestamp(tz=tzstr) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) @@ -561,7 +561,7 @@ def test_to_timestamp_tz_arg(self, tzstr): assert p.tz == exp_zone.tzinfo assert p.tz == exp.tz - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): p = Period("1/1/2005", freq="3H").to_timestamp(tz=tzstr) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) @@ -570,7 +570,7 @@ def test_to_timestamp_tz_arg(self, tzstr): assert p.tz == exp_zone.tzinfo assert p.tz == exp.tz - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): p = Period("1/1/2005", freq="A").to_timestamp(freq="A", tz=tzstr) exp = Timestamp(day=31, month=12, year=2005, tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) @@ -579,7 +579,7 @@ def test_to_timestamp_tz_arg(self, tzstr): assert p.tz == exp_zone.tzinfo assert p.tz == exp.tz - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): p = Period("1/1/2005", freq="A").to_timestamp(freq="3H", tz=tzstr) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) @@ -594,14 +594,14 @@ def test_to_timestamp_tz_arg(self, tzstr): ) def test_to_timestamp_tz_arg_dateutil(self, tzstr): tz = maybe_get_tz(tzstr) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): p = Period("1/1/2005", freq="M").to_timestamp(tz=tz) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) assert p == exp assert p.tz == dateutil_gettz(tzstr.split("/", 1)[1]) assert p.tz == exp.tz - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): p = Period("1/1/2005", freq="M").to_timestamp(freq="3H", tz=tz) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) assert p == exp @@ -609,7 +609,7 @@ def test_to_timestamp_tz_arg_dateutil(self, tzstr): assert p.tz == exp.tz def test_to_timestamp_tz_arg_dateutil_from_string(self): - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): p = Period("1/1/2005", freq="M").to_timestamp(tz="dateutil/Europe/Brussels") assert p.tz == dateutil_gettz("Europe/Brussels") From c4c87bcf733b0aa65ac0effb6d6122ce6e3a392a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 21 Aug 2021 19:30:12 +0100 Subject: [PATCH 15/26] wip --- pandas/tests/io/parser/test_parse_dates.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index a108b13aebf2c..2732769689d7e 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -440,7 +440,7 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning): parser = all_parsers with tm.assert_produces_warning( - UserWarning, check_stacklevel=False, raise_on_extra_warnings=False + (FutureWarning, UserWarning), check_stacklevel=False ): result = parser.read_csv( StringIO(data), @@ -1242,7 +1242,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni """ parser = all_parsers with tm.assert_produces_warning( - warning, check_stacklevel=False, raise_on_extra_warnings=False + (FutureWarning, UserWarning), check_stacklevel=False ): result = parser.read_csv( StringIO(data), @@ -1338,7 +1338,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning): parser = all_parsers with tm.assert_produces_warning( - warning, check_stacklevel=False, raise_on_extra_warnings=False + (FutureWarning, UserWarning), check_stacklevel=False ): result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs) From c4e282dcd2e988e451d415520226eace4e5ef5aa Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 21 Aug 2021 19:45:41 +0100 Subject: [PATCH 16/26] wip --- pandas/tests/io/test_date_converters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py index a9fa27e091714..8a59a8fb86358 100644 --- a/pandas/tests/io/test_date_converters.py +++ b/pandas/tests/io/test_date_converters.py @@ -12,7 +12,7 @@ def test_parse_date_time(): dates = np.array(["2007/1/3", "2008/2/4"], dtype=object) times = np.array(["05:07:09", "06:08:00"], dtype=object) expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning((FutureWarning, UserWarning)): result = conv.parse_date_time(dates, times) tm.assert_numpy_array_equal(result, expected) From 44a0533df2ddb692f6c967f0094808126729aa28 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 21 Aug 2021 20:16:34 +0100 Subject: [PATCH 17/26] wip --- pandas/tests/plotting/frame/test_frame_subplots.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index fa4a132001be5..2b51a4b874e7f 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -259,7 +259,7 @@ def test_subplots_warnings(self): df.plot(subplots=True, layout=(3, 2)) df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + np.random.randn(100, 4), index=date_range("01/01/2000", periods=100) ) df.plot(subplots=True, layout=(3, 2)) @@ -430,7 +430,7 @@ def test_df_subplots_patterns_minorticks(self): df = DataFrame( np.random.randn(10, 2), - index=date_range("1/1/2000", periods=10), + index=date_range("01/01/2000", periods=10), columns=list("AB"), ) From 5362670f6a3243d16584cc7c687779ecec500729 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Aug 2021 14:58:30 +0100 Subject: [PATCH 18/26] fixup test --- pandas/_libs/tslibs/parsing.pyx | 39 +++++++--- .../tests/frame/methods/test_reset_index.py | 76 ++++++++++++++++--- 2 files changed, 93 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 048e1b2d96f12..f5d67bb2e062f 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -39,7 +39,7 @@ cnp.import_array() from dateutil.parser import ( DEFAULTPARSER, - parse as du_parse_raw, + parse as du_parse, ) from dateutil.relativedelta import relativedelta from dateutil.tz import ( @@ -238,12 +238,12 @@ cdef inline bint does_string_look_like_time(str parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 -def du_parse(*args, **kwargs): +def du_parse_with_warning(*args, **kwargs): warnings.warn( "Parsing datetime strings without a format specified, " "please specify a format to avoid unexpected results" ) - return du_parse_raw(*args, **kwargs) + return du_parse(*args, **kwargs) def parse_datetime_string( @@ -269,8 +269,12 @@ def parse_datetime_string( if does_string_look_like_time(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME - dt = du_parse(date_string, dayfirst=dayfirst, - yearfirst=yearfirst, **kwargs) + dt = du_parse_with_warning( + date_string, + dayfirst=dayfirst, + yearfirst=yearfirst, + **kwargs, + ) return dt dt, _ = _parse_delimited_date(date_string, dayfirst) @@ -286,8 +290,13 @@ def parse_datetime_string( pass try: - dt = du_parse(date_string, default=_DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) + dt = du_parse_with_warning( + date_string, + default=_DEFAULT_DATETIME, + dayfirst=dayfirst, + yearfirst=yearfirst, + **kwargs, + ) except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable @@ -641,7 +650,11 @@ def try_parse_dates( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + parse_date = lambda x: du_parse_with_warning( + x, + dayfirst=dayfirst, + default=default, + ) # EAFP here try: @@ -688,13 +701,17 @@ def try_parse_date_and_time( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + parse_date = lambda x: du_parse_with_warning( + x, + dayfirst=dayfirst, + default=default, + ) else: parse_date = date_parser if time_parser is None: - parse_time = lambda x: du_parse(x) + parse_time = lambda x: du_parse_with_warning(x) else: parse_time = time_parser @@ -868,7 +885,7 @@ def format_is_iso(f: str) -> bint: def guess_datetime_format( dt_str, bint dayfirst=False, - dt_str_parse=du_parse, + dt_str_parse=du_parse_with_warning, dt_str_split=_DATEUTIL_LEXER_SPLIT, ): """ diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 76d259707787d..18c147cf7f160 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -54,7 +54,7 @@ def test_reset_index_tz(self, tz_aware_fixture): # GH 3950 # reset_index with single level tz = tz_aware_fixture - idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") + idx = date_range("01/01/2011", periods=5, freq="D", tz=tz, name="idx") df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) expected = DataFrame( @@ -320,28 +320,82 @@ def test_reset_index_multiindex_nan(self): [ None, "foo", + ], + ) + def test_reset_index_with_datetimeindex_cols_with_user_warning(self, name): + # GH#5818 + df = DataFrame( + [[1, 2], [3, 4]], + columns=date_range("01/01/2013", "01/02/2013"), + index=["A", "B"], + ) + df.index.name = name + + with tm.assert_produces_warning(UserWarning): + result = df.reset_index() + + item = name if name is not None else "index" + columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) + if isinstance(item, str) and item == "2012-12-31": + columns = columns.astype("datetime64[ns]") + else: + assert columns.dtype == object + + expected = DataFrame( + [["A", 1, 2], ["B", 3, 4]], + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "name", + [ 2, 3.0, pd.Timedelta(6), - Timestamp("2012-12-30", tz="UTC"), "2012-12-31", ], ) def test_reset_index_with_datetimeindex_cols(self, name): # GH#5818 - warn = None - if isinstance(name, Timestamp) and name.tz is not None: - # _deprecate_mismatched_indexing - warn = FutureWarning + df = DataFrame( + [[1, 2], [3, 4]], + columns=date_range("01/01/2013", "01/02/2013"), + index=["A", "B"], + ) + df.index.name = name + + result = df.reset_index() + + item = name if name is not None else "index" + columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) + if isinstance(item, str) and item == "2012-12-31": + columns = columns.astype("datetime64[ns]") + else: + assert columns.dtype == object + expected = DataFrame( + [["A", 1, 2], ["B", 3, 4]], + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "name", + [ + Timestamp("2012-12-30", tz="UTC"), + ], + ) + def test_reset_index_with_datetimeindex_cols_with_future_warning(self, name): + # GH#5818 df = DataFrame( [[1, 2], [3, 4]], - columns=date_range("1/1/2013", "1/2/2013"), + columns=date_range("01/01/2013", "01/02/2013"), index=["A", "B"], ) df.index.name = name - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning((FutureWarning, UserWarning)): result = df.reset_index() item = name if name is not None else "index" @@ -425,7 +479,7 @@ def test_reset_index_multiindex_columns(self): def test_reset_index_datetime(self, tz_naive_fixture): # GH#3950 tz = tz_naive_fixture - idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx1 = date_range("01/01/2011", periods=5, freq="D", tz=tz, name="idx1") idx2 = Index(range(5), name="idx2", dtype="int64") idx = MultiIndex.from_arrays([idx1, idx2]) df = DataFrame( @@ -453,7 +507,7 @@ def test_reset_index_datetime(self, tz_naive_fixture): tm.assert_frame_equal(df.reset_index(), expected) idx3 = date_range( - "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" + "01/01/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" ) idx = MultiIndex.from_arrays([idx1, idx2, idx3]) df = DataFrame( @@ -615,7 +669,7 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes): [ (["a", "b"], object), ( - pd.period_range("12-1-2000", periods=2, freq="Q-DEC"), + pd.period_range("12-01-2000", periods=2, freq="Q-DEC"), pd.PeriodDtype(freq="Q-DEC"), ), ], From 6b43118339b56c9f9f7480c49c8c4a04a76775c9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Aug 2021 16:38:12 +0100 Subject: [PATCH 19/26] more fixups --- pandas/tests/tslibs/test_array_to_datetime.py | 87 ++++++++++++++++--- pandas/tests/tslibs/test_parsing.py | 21 ++++- 2 files changed, 94 insertions(+), 14 deletions(-) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 8c2f0b09c461e..fe8ad9408d933 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -28,6 +28,19 @@ "2013-01-02T00:00:00.000000000-0000", ], ), + ], +) +def test_parsing_valid_dates(data, expected): + arr = np.array(data, dtype=object) + result, _ = tslib.array_to_datetime(arr) + + expected = np_array_datetime64_compat(expected, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "data,expected", + [ ( ["Mon Sep 16 2013", "Tue Sep 17 2013"], [ @@ -37,9 +50,10 @@ ), ], ) -def test_parsing_valid_dates(data, expected): +def test_parsing_valid_dates_with_user_warning(data, expected): arr = np.array(data, dtype=object) - result, _ = tslib.array_to_datetime(arr) + with tm.assert_produces_warning(UserWarning): + result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected) @@ -48,17 +62,16 @@ def test_parsing_valid_dates(data, expected): @pytest.mark.parametrize( "dt_string, expected_tz", [ - ["01-01-2013 08:00:00+08:00", 480], ["2013-01-01T08:00:00.000000000+0800", 480], ["2012-12-31T16:00:00.000000000-0800", -480], - ["12-31-2012 23:00:00-01:00", -60], ], ) def test_parsing_timezone_offsets(dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added. arr = np.array(["01-01-2013 00:00:00"], dtype=object) - expected, _ = tslib.array_to_datetime(arr) + with tm.assert_produces_warning(UserWarning): + expected, _ = tslib.array_to_datetime(arr) arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) @@ -67,11 +80,34 @@ def test_parsing_timezone_offsets(dt_string, expected_tz): assert result_tz is pytz.FixedOffset(expected_tz) +@pytest.mark.parametrize( + "dt_string, expected_tz", + [ + ["01-01-2013 08:00:00+08:00", 480], + ["12-31-2012 23:00:00-01:00", -60], + ], +) +def test_parsing_timezone_offsets_with_two_user_warnings(dt_string, expected_tz): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added. + arr = np.array(["01-01-2013 00:00:00"], dtype=object) + with tm.assert_produces_warning(UserWarning): + expected, _ = tslib.array_to_datetime(arr) + + arr = np.array([dt_string], dtype=object) + with tm.assert_produces_warning(UserWarning): + result, result_tz = tslib.array_to_datetime(arr) + + tm.assert_numpy_array_equal(result, expected) + assert result_tz is pytz.FixedOffset(expected_tz) + + def test_parsing_non_iso_timezone_offset(): dt_string = "01-01-2013T00:00:00.000000000+0000" arr = np.array([dt_string], dtype=object) - result, result_tz = tslib.array_to_datetime(arr) + with tm.assert_produces_warning(UserWarning): + result, result_tz = tslib.array_to_datetime(arr) expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")]) tm.assert_numpy_array_equal(result, expected) @@ -83,7 +119,8 @@ def test_parsing_different_timezone_offsets(): data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) - result, result_tz = tslib.array_to_datetime(data) + with tm.assert_produces_warning(UserWarning): + result, result_tz = tslib.array_to_datetime(data) expected = np.array( [ datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), @@ -116,7 +153,6 @@ def test_number_looking_strings_not_into_datetime(data): date(1000, 1, 1), datetime(1000, 1, 1), "1000-01-01", - "Jan 1, 1000", np.datetime64("1000-01-01"), ], ) @@ -137,9 +173,36 @@ def test_coerce_outside_ns_bounds(invalid_date, errors): tm.assert_numpy_array_equal(result, expected) +@pytest.mark.parametrize( + "invalid_date", + [ + "Jan 1, 1000", + ], +) +@pytest.mark.parametrize("errors", ["coerce", "raise"]) +def test_coerce_outside_ns_bounds_with_user_warning(invalid_date, errors): + arr = np.array([invalid_date], dtype="object") + kwargs = {"values": arr, "errors": errors} + + if errors == "raise": + msg = "Out of bounds nanosecond timestamp" + + with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( + UserWarning + ): + tslib.array_to_datetime(**kwargs) + else: # coerce. + with tm.assert_produces_warning(UserWarning): + result, _ = tslib.array_to_datetime(**kwargs) + expected = np.array([iNaT], dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + def test_coerce_outside_ns_bounds_one_valid(): arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) - result, _ = tslib.array_to_datetime(arr, errors="coerce") + with tm.assert_produces_warning(UserWarning): + result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = [iNaT, "2000-01-01T00:00:00.000000000-0000"] expected = np_array_datetime64_compat(expected, dtype="M8[ns]") @@ -155,11 +218,13 @@ def test_coerce_of_invalid_datetimes(errors): if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. - result, _ = tslib.array_to_datetime(**kwargs) + with tm.assert_produces_warning(UserWarning): + result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT - result, _ = tslib.array_to_datetime(arr, errors="coerce") + with tm.assert_produces_warning(UserWarning): + result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000-0000", iNaT, iNaT] tm.assert_numpy_array_equal( diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index e580b9112f3ec..683b377776e2d 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -155,7 +155,8 @@ def test_guess_datetime_format_with_parseable_formats(string, fmt): @pytest.mark.parametrize("dayfirst,expected", [(True, "%d/%m/%Y"), (False, "%m/%d/%Y")]) def test_guess_datetime_format_with_dayfirst(dayfirst, expected): ambiguous_string = "01/01/2011" - result = parsing.guess_datetime_format(ambiguous_string, dayfirst=dayfirst) + with tm.assert_produces_warning(UserWarning): + result = parsing.guess_datetime_format(ambiguous_string, dayfirst=dayfirst) assert result == expected @@ -182,6 +183,18 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt): "1/1/1/1", "this_is_not_a_datetime", "51a", + ], +) +def test_guess_datetime_format_invalid_inputs_with_user_warning(invalid_dt): + # A datetime string must include a year, month and a day for it to be + # guessable, in addition to being a string that looks like a datetime. + with tm.assert_produces_warning(UserWarning): + assert parsing.guess_datetime_format(invalid_dt) is None + + +@pytest.mark.parametrize( + "invalid_dt", + [ 9, datetime(2011, 1, 1), ], @@ -205,13 +218,15 @@ def test_guess_datetime_format_invalid_inputs(invalid_dt): ) def test_guess_datetime_format_no_padding(string, fmt): # see gh-11142 - result = parsing.guess_datetime_format(string) + with tm.assert_produces_warning(UserWarning): + result = parsing.guess_datetime_format(string) assert result == fmt def test_try_parse_dates(): arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object) - result = parsing.try_parse_dates(arr, dayfirst=True) + with tm.assert_produces_warning(UserWarning): + result = parsing.try_parse_dates(arr, dayfirst=True) expected = np.array([parse(d, dayfirst=True) for d in arr]) tm.assert_numpy_array_equal(result, expected) From 700881d31b751d41dd5f42aeb37e0346fdeb2839 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Aug 2021 18:14:21 +0100 Subject: [PATCH 20/26] fixup --- doc/source/user_guide/timedeltas.rst | 2 ++ pandas/_libs/tslibs/parsing.pyx | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 180de1df53f9e..b0e0d72d05f88 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -424,6 +424,7 @@ Similarly to other of the datetime-like indices, ``DatetimeIndex`` and ``PeriodI Selections work similarly, with coercion on string-likes and slices: .. ipython:: python + :okwarning: s["1 day":"2 day"] s["1 day 01:00:00"] @@ -432,6 +433,7 @@ Selections work similarly, with coercion on string-likes and slices: Furthermore you can use partial string selection and the range will be inferred: .. ipython:: python + :okwarning: s["1 day":"1 day 5 hours"] diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index f5d67bb2e062f..713b7be15654d 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -241,7 +241,8 @@ cdef inline bint does_string_look_like_time(str parse_string): def du_parse_with_warning(*args, **kwargs): warnings.warn( "Parsing datetime strings without a format specified, " - "please specify a format to avoid unexpected results" + "please specify a format to avoid unexpected results", + stacklevel=4, ) return du_parse(*args, **kwargs) From bd893a2bb9d9d04d57ae0463b635c05b57f540fb Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Aug 2021 19:08:36 +0100 Subject: [PATCH 21/26] revert to b4bb5b330ad25c7dbca36fe55d4c264ec4d027d1 --- doc/source/user_guide/timedeltas.rst | 2 - pandas/_libs/tslibs/parsing.pyx | 42 ++------- .../tests/frame/methods/test_reset_index.py | 76 +++------------- pandas/tests/io/parser/test_parse_dates.py | 12 +-- pandas/tests/io/test_date_converters.py | 2 +- .../plotting/frame/test_frame_subplots.py | 4 +- pandas/tests/scalar/period/test_period.py | 14 +-- pandas/tests/tslibs/test_array_to_datetime.py | 87 +++---------------- pandas/tests/tslibs/test_parsing.py | 21 +---- 9 files changed, 46 insertions(+), 214 deletions(-) diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index b0e0d72d05f88..180de1df53f9e 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -424,7 +424,6 @@ Similarly to other of the datetime-like indices, ``DatetimeIndex`` and ``PeriodI Selections work similarly, with coercion on string-likes and slices: .. ipython:: python - :okwarning: s["1 day":"2 day"] s["1 day 01:00:00"] @@ -433,7 +432,6 @@ Selections work similarly, with coercion on string-likes and slices: Furthermore you can use partial string selection and the range will be inferred: .. ipython:: python - :okwarning: s["1 day":"1 day 5 hours"] diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 713b7be15654d..cfa16df367bce 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -238,15 +238,6 @@ cdef inline bint does_string_look_like_time(str parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 -def du_parse_with_warning(*args, **kwargs): - warnings.warn( - "Parsing datetime strings without a format specified, " - "please specify a format to avoid unexpected results", - stacklevel=4, - ) - return du_parse(*args, **kwargs) - - def parse_datetime_string( str date_string, bint dayfirst=False, @@ -270,12 +261,8 @@ def parse_datetime_string( if does_string_look_like_time(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME - dt = du_parse_with_warning( - date_string, - dayfirst=dayfirst, - yearfirst=yearfirst, - **kwargs, - ) + dt = du_parse(date_string, dayfirst=dayfirst, + yearfirst=yearfirst, **kwargs) return dt dt, _ = _parse_delimited_date(date_string, dayfirst) @@ -291,13 +278,8 @@ def parse_datetime_string( pass try: - dt = du_parse_with_warning( - date_string, - default=_DEFAULT_DATETIME, - dayfirst=dayfirst, - yearfirst=yearfirst, - **kwargs, - ) + dt = du_parse(date_string, default=_DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable @@ -651,11 +633,7 @@ def try_parse_dates( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse_with_warning( - x, - dayfirst=dayfirst, - default=default, - ) + parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) # EAFP here try: @@ -702,17 +680,13 @@ def try_parse_date_and_time( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse_with_warning( - x, - dayfirst=dayfirst, - default=default, - ) + parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) else: parse_date = date_parser if time_parser is None: - parse_time = lambda x: du_parse_with_warning(x) + parse_time = lambda x: du_parse(x) else: parse_time = time_parser @@ -886,7 +860,7 @@ def format_is_iso(f: str) -> bint: def guess_datetime_format( dt_str, bint dayfirst=False, - dt_str_parse=du_parse_with_warning, + dt_str_parse=du_parse, dt_str_split=_DATEUTIL_LEXER_SPLIT, ): """ diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 18c147cf7f160..76d259707787d 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -54,7 +54,7 @@ def test_reset_index_tz(self, tz_aware_fixture): # GH 3950 # reset_index with single level tz = tz_aware_fixture - idx = date_range("01/01/2011", periods=5, freq="D", tz=tz, name="idx") + idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) expected = DataFrame( @@ -320,82 +320,28 @@ def test_reset_index_multiindex_nan(self): [ None, "foo", - ], - ) - def test_reset_index_with_datetimeindex_cols_with_user_warning(self, name): - # GH#5818 - df = DataFrame( - [[1, 2], [3, 4]], - columns=date_range("01/01/2013", "01/02/2013"), - index=["A", "B"], - ) - df.index.name = name - - with tm.assert_produces_warning(UserWarning): - result = df.reset_index() - - item = name if name is not None else "index" - columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) - if isinstance(item, str) and item == "2012-12-31": - columns = columns.astype("datetime64[ns]") - else: - assert columns.dtype == object - - expected = DataFrame( - [["A", 1, 2], ["B", 3, 4]], - columns=columns, - ) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "name", - [ 2, 3.0, pd.Timedelta(6), + Timestamp("2012-12-30", tz="UTC"), "2012-12-31", ], ) def test_reset_index_with_datetimeindex_cols(self, name): # GH#5818 - df = DataFrame( - [[1, 2], [3, 4]], - columns=date_range("01/01/2013", "01/02/2013"), - index=["A", "B"], - ) - df.index.name = name - - result = df.reset_index() - - item = name if name is not None else "index" - columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) - if isinstance(item, str) and item == "2012-12-31": - columns = columns.astype("datetime64[ns]") - else: - assert columns.dtype == object + warn = None + if isinstance(name, Timestamp) and name.tz is not None: + # _deprecate_mismatched_indexing + warn = FutureWarning - expected = DataFrame( - [["A", 1, 2], ["B", 3, 4]], - columns=columns, - ) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "name", - [ - Timestamp("2012-12-30", tz="UTC"), - ], - ) - def test_reset_index_with_datetimeindex_cols_with_future_warning(self, name): - # GH#5818 df = DataFrame( [[1, 2], [3, 4]], - columns=date_range("01/01/2013", "01/02/2013"), + columns=date_range("1/1/2013", "1/2/2013"), index=["A", "B"], ) df.index.name = name - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(warn): result = df.reset_index() item = name if name is not None else "index" @@ -479,7 +425,7 @@ def test_reset_index_multiindex_columns(self): def test_reset_index_datetime(self, tz_naive_fixture): # GH#3950 tz = tz_naive_fixture - idx1 = date_range("01/01/2011", periods=5, freq="D", tz=tz, name="idx1") + idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") idx2 = Index(range(5), name="idx2", dtype="int64") idx = MultiIndex.from_arrays([idx1, idx2]) df = DataFrame( @@ -507,7 +453,7 @@ def test_reset_index_datetime(self, tz_naive_fixture): tm.assert_frame_equal(df.reset_index(), expected) idx3 = date_range( - "01/01/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" ) idx = MultiIndex.from_arrays([idx1, idx2, idx3]) df = DataFrame( @@ -669,7 +615,7 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes): [ (["a", "b"], object), ( - pd.period_range("12-01-2000", periods=2, freq="Q-DEC"), + pd.period_range("12-1-2000", periods=2, freq="Q-DEC"), pd.PeriodDtype(freq="Q-DEC"), ), ], diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 2732769689d7e..754b1dfe7c2b9 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -439,9 +439,7 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning): parse_dates = {"actual": [1, 2], "nominal": [1, 3]} parser = all_parsers - with tm.assert_produces_warning( - (FutureWarning, UserWarning), check_stacklevel=False - ): + with tm.assert_produces_warning(warning, check_stacklevel=False): result = parser.read_csv( StringIO(data), header=None, @@ -1241,9 +1239,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni 2001-01-06, 00:00:00, 1.0, 11. """ parser = all_parsers - with tm.assert_produces_warning( - (FutureWarning, UserWarning), check_stacklevel=False - ): + with tm.assert_produces_warning(warning, check_stacklevel=False): result = parser.read_csv( StringIO(data), header=[0, 1], @@ -1337,9 +1333,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni ) def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning): parser = all_parsers - with tm.assert_produces_warning( - (FutureWarning, UserWarning), check_stacklevel=False - ): + with tm.assert_produces_warning(warning, check_stacklevel=False): result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs) # Python can sometimes be flaky about how diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py index 8a59a8fb86358..a9fa27e091714 100644 --- a/pandas/tests/io/test_date_converters.py +++ b/pandas/tests/io/test_date_converters.py @@ -12,7 +12,7 @@ def test_parse_date_time(): dates = np.array(["2007/1/3", "2008/2/4"], dtype=object) times = np.array(["05:07:09", "06:08:00"], dtype=object) expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(FutureWarning): result = conv.parse_date_time(dates, times) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 2b51a4b874e7f..fa4a132001be5 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -259,7 +259,7 @@ def test_subplots_warnings(self): df.plot(subplots=True, layout=(3, 2)) df = DataFrame( - np.random.randn(100, 4), index=date_range("01/01/2000", periods=100) + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) ) df.plot(subplots=True, layout=(3, 2)) @@ -430,7 +430,7 @@ def test_df_subplots_patterns_minorticks(self): df = DataFrame( np.random.randn(10, 2), - index=date_range("01/01/2000", periods=10), + index=date_range("1/1/2000", periods=10), columns=list("AB"), ) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 0c0d5e1597582..7e6c2a452f1a0 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -552,7 +552,7 @@ def test_hash(self): @pytest.mark.parametrize("tzstr", ["Europe/Brussels", "Asia/Tokyo", "US/Pacific"]) def test_to_timestamp_tz_arg(self, tzstr): # GH#34522 tz kwarg deprecated - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(FutureWarning): p = Period("1/1/2005", freq="M").to_timestamp(tz=tzstr) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) @@ -561,7 +561,7 @@ def test_to_timestamp_tz_arg(self, tzstr): assert p.tz == exp_zone.tzinfo assert p.tz == exp.tz - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(FutureWarning): p = Period("1/1/2005", freq="3H").to_timestamp(tz=tzstr) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) @@ -570,7 +570,7 @@ def test_to_timestamp_tz_arg(self, tzstr): assert p.tz == exp_zone.tzinfo assert p.tz == exp.tz - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(FutureWarning): p = Period("1/1/2005", freq="A").to_timestamp(freq="A", tz=tzstr) exp = Timestamp(day=31, month=12, year=2005, tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) @@ -579,7 +579,7 @@ def test_to_timestamp_tz_arg(self, tzstr): assert p.tz == exp_zone.tzinfo assert p.tz == exp.tz - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(FutureWarning): p = Period("1/1/2005", freq="A").to_timestamp(freq="3H", tz=tzstr) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) exp_zone = pytz.timezone(tzstr).normalize(p) @@ -594,14 +594,14 @@ def test_to_timestamp_tz_arg(self, tzstr): ) def test_to_timestamp_tz_arg_dateutil(self, tzstr): tz = maybe_get_tz(tzstr) - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(FutureWarning): p = Period("1/1/2005", freq="M").to_timestamp(tz=tz) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) assert p == exp assert p.tz == dateutil_gettz(tzstr.split("/", 1)[1]) assert p.tz == exp.tz - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(FutureWarning): p = Period("1/1/2005", freq="M").to_timestamp(freq="3H", tz=tz) exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) assert p == exp @@ -609,7 +609,7 @@ def test_to_timestamp_tz_arg_dateutil(self, tzstr): assert p.tz == exp.tz def test_to_timestamp_tz_arg_dateutil_from_string(self): - with tm.assert_produces_warning((FutureWarning, UserWarning)): + with tm.assert_produces_warning(FutureWarning): p = Period("1/1/2005", freq="M").to_timestamp(tz="dateutil/Europe/Brussels") assert p.tz == dateutil_gettz("Europe/Brussels") diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index fe8ad9408d933..8c2f0b09c461e 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -28,19 +28,6 @@ "2013-01-02T00:00:00.000000000-0000", ], ), - ], -) -def test_parsing_valid_dates(data, expected): - arr = np.array(data, dtype=object) - result, _ = tslib.array_to_datetime(arr) - - expected = np_array_datetime64_compat(expected, dtype="M8[ns]") - tm.assert_numpy_array_equal(result, expected) - - -@pytest.mark.parametrize( - "data,expected", - [ ( ["Mon Sep 16 2013", "Tue Sep 17 2013"], [ @@ -50,10 +37,9 @@ def test_parsing_valid_dates(data, expected): ), ], ) -def test_parsing_valid_dates_with_user_warning(data, expected): +def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) - with tm.assert_produces_warning(UserWarning): - result, _ = tslib.array_to_datetime(arr) + result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected) @@ -62,16 +48,17 @@ def test_parsing_valid_dates_with_user_warning(data, expected): @pytest.mark.parametrize( "dt_string, expected_tz", [ + ["01-01-2013 08:00:00+08:00", 480], ["2013-01-01T08:00:00.000000000+0800", 480], ["2012-12-31T16:00:00.000000000-0800", -480], + ["12-31-2012 23:00:00-01:00", -60], ], ) def test_parsing_timezone_offsets(dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added. arr = np.array(["01-01-2013 00:00:00"], dtype=object) - with tm.assert_produces_warning(UserWarning): - expected, _ = tslib.array_to_datetime(arr) + expected, _ = tslib.array_to_datetime(arr) arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) @@ -80,34 +67,11 @@ def test_parsing_timezone_offsets(dt_string, expected_tz): assert result_tz is pytz.FixedOffset(expected_tz) -@pytest.mark.parametrize( - "dt_string, expected_tz", - [ - ["01-01-2013 08:00:00+08:00", 480], - ["12-31-2012 23:00:00-01:00", -60], - ], -) -def test_parsing_timezone_offsets_with_two_user_warnings(dt_string, expected_tz): - # All of these datetime strings with offsets are equivalent - # to the same datetime after the timezone offset is added. - arr = np.array(["01-01-2013 00:00:00"], dtype=object) - with tm.assert_produces_warning(UserWarning): - expected, _ = tslib.array_to_datetime(arr) - - arr = np.array([dt_string], dtype=object) - with tm.assert_produces_warning(UserWarning): - result, result_tz = tslib.array_to_datetime(arr) - - tm.assert_numpy_array_equal(result, expected) - assert result_tz is pytz.FixedOffset(expected_tz) - - def test_parsing_non_iso_timezone_offset(): dt_string = "01-01-2013T00:00:00.000000000+0000" arr = np.array([dt_string], dtype=object) - with tm.assert_produces_warning(UserWarning): - result, result_tz = tslib.array_to_datetime(arr) + result, result_tz = tslib.array_to_datetime(arr) expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")]) tm.assert_numpy_array_equal(result, expected) @@ -119,8 +83,7 @@ def test_parsing_different_timezone_offsets(): data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) - with tm.assert_produces_warning(UserWarning): - result, result_tz = tslib.array_to_datetime(data) + result, result_tz = tslib.array_to_datetime(data) expected = np.array( [ datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), @@ -153,6 +116,7 @@ def test_number_looking_strings_not_into_datetime(data): date(1000, 1, 1), datetime(1000, 1, 1), "1000-01-01", + "Jan 1, 1000", np.datetime64("1000-01-01"), ], ) @@ -173,36 +137,9 @@ def test_coerce_outside_ns_bounds(invalid_date, errors): tm.assert_numpy_array_equal(result, expected) -@pytest.mark.parametrize( - "invalid_date", - [ - "Jan 1, 1000", - ], -) -@pytest.mark.parametrize("errors", ["coerce", "raise"]) -def test_coerce_outside_ns_bounds_with_user_warning(invalid_date, errors): - arr = np.array([invalid_date], dtype="object") - kwargs = {"values": arr, "errors": errors} - - if errors == "raise": - msg = "Out of bounds nanosecond timestamp" - - with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( - UserWarning - ): - tslib.array_to_datetime(**kwargs) - else: # coerce. - with tm.assert_produces_warning(UserWarning): - result, _ = tslib.array_to_datetime(**kwargs) - expected = np.array([iNaT], dtype="M8[ns]") - - tm.assert_numpy_array_equal(result, expected) - - def test_coerce_outside_ns_bounds_one_valid(): arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) - with tm.assert_produces_warning(UserWarning): - result, _ = tslib.array_to_datetime(arr, errors="coerce") + result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = [iNaT, "2000-01-01T00:00:00.000000000-0000"] expected = np_array_datetime64_compat(expected, dtype="M8[ns]") @@ -218,13 +155,11 @@ def test_coerce_of_invalid_datetimes(errors): if errors == "ignore": # Without coercing, the presence of any invalid # dates prevents any values from being converted. - with tm.assert_produces_warning(UserWarning): - result, _ = tslib.array_to_datetime(**kwargs) + result, _ = tslib.array_to_datetime(**kwargs) tm.assert_numpy_array_equal(result, arr) else: # coerce. # With coercing, the invalid dates becomes iNaT - with tm.assert_produces_warning(UserWarning): - result, _ = tslib.array_to_datetime(arr, errors="coerce") + result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000-0000", iNaT, iNaT] tm.assert_numpy_array_equal( diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 683b377776e2d..e580b9112f3ec 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -155,8 +155,7 @@ def test_guess_datetime_format_with_parseable_formats(string, fmt): @pytest.mark.parametrize("dayfirst,expected", [(True, "%d/%m/%Y"), (False, "%m/%d/%Y")]) def test_guess_datetime_format_with_dayfirst(dayfirst, expected): ambiguous_string = "01/01/2011" - with tm.assert_produces_warning(UserWarning): - result = parsing.guess_datetime_format(ambiguous_string, dayfirst=dayfirst) + result = parsing.guess_datetime_format(ambiguous_string, dayfirst=dayfirst) assert result == expected @@ -183,18 +182,6 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt): "1/1/1/1", "this_is_not_a_datetime", "51a", - ], -) -def test_guess_datetime_format_invalid_inputs_with_user_warning(invalid_dt): - # A datetime string must include a year, month and a day for it to be - # guessable, in addition to being a string that looks like a datetime. - with tm.assert_produces_warning(UserWarning): - assert parsing.guess_datetime_format(invalid_dt) is None - - -@pytest.mark.parametrize( - "invalid_dt", - [ 9, datetime(2011, 1, 1), ], @@ -218,15 +205,13 @@ def test_guess_datetime_format_invalid_inputs(invalid_dt): ) def test_guess_datetime_format_no_padding(string, fmt): # see gh-11142 - with tm.assert_produces_warning(UserWarning): - result = parsing.guess_datetime_format(string) + result = parsing.guess_datetime_format(string) assert result == fmt def test_try_parse_dates(): arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object) - with tm.assert_produces_warning(UserWarning): - result = parsing.try_parse_dates(arr, dayfirst=True) + result = parsing.try_parse_dates(arr, dayfirst=True) expected = np.array([parse(d, dayfirst=True) for d in arr]) tm.assert_numpy_array_equal(result, expected) From 11049a69d620a3be30da8ad672ba1d9195fa25b2 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Aug 2021 19:16:05 +0100 Subject: [PATCH 22/26] document in timeseries.rst --- doc/source/user_guide/timeseries.rst | 5 +++-- doc/source/whatsnew/v1.4.0.rst | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index a26ae72bf31ca..a112c632ceb25 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -212,9 +212,10 @@ you can pass the ``dayfirst`` flag: .. warning:: - You see in the above example that ``dayfirst`` isn't strict, so if a date + You see in the above example that ``dayfirst`` isn't strict. If a date can't be parsed with the day being first it will be parsed as if - ``dayfirst`` were False. + ``dayfirst`` were False, and in the case of parsing delimited date strings + (e.g. ``31-12-2012``) then a warning will also be raised. If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``. ``Timestamp`` can also accept string input, but it doesn't accept string parsing diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 89f67ee642973..e3ccd887dd677 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -262,7 +262,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`) -- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised (:issue:`12585`) +- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`) - Timedelta From f6c44da70eadb1afad7eca532d00e3d25e52fe64 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Aug 2021 19:39:40 +0100 Subject: [PATCH 23/26] add tests for read_csv --- pandas/tests/io/parser/test_parse_dates.py | 96 ++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 754b1dfe7c2b9..575fce41c2562 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -40,6 +40,7 @@ from pandas.core.indexes.datetimes import date_range import pandas.io.date_converters as conv +from pandas.io.parsers import read_csv # constant _DEFAULT_DATETIME = datetime(1, 1, 1) @@ -1701,3 +1702,98 @@ def test_date_parser_usecols_thousands(all_parsers): ) expected = DataFrame({"B": [3, 4], "C": [Timestamp("20-09-2001 01:00:00")] * 2}) tm.assert_frame_equal(result, expected) + + +def test_dayfirst_warnings(): + # GH 12585 + warning_msg_day_first = ( + "Parsing '31/12/2014' in DD/MM/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) + warning_msg_month_first = ( + "Parsing '03/30/2011' in MM/DD/YYYY format. Provide " + "format or specify infer_datetime_format=True for consistent parsing." + ) + + # CASE 1: valid input + input = "date\n31/12/2014\n10/03/2011" + expected = DatetimeIndex( + ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date" + ) + + # A. dayfirst arg correct, no warning + res1 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date" + ).index + tm.assert_index_equal(expected, res1) + + # B. dayfirst arg incorrect, warning + incorrect output + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res2 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" + ).index + with pytest.raises(AssertionError, match=None), tm.assert_produces_warning( + UserWarning, match=warning_msg_day_first + ): + tm.assert_index_equal(expected, res2) + + # C. dayfirst default arg, same as B + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res3 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" + ).index + with pytest.raises(AssertionError, match=None), tm.assert_produces_warning( + UserWarning, match=warning_msg_day_first + ): + tm.assert_index_equal(expected, res3) + + # D. infer_datetime_format=True overrides dayfirst default + # no warning + correct result + res4 = read_csv( + StringIO(input), + parse_dates=["date"], + infer_datetime_format=True, + index_col="date", + ).index + tm.assert_index_equal(expected, res4) + + # CASE 2: invalid input + # cannot consistently process with single format + # warnings *always* raised + + # first in DD/MM/YYYY, second in MM/DD/YYYY + input = "date\n31/12/2014\n03/30/2011" + expected = DatetimeIndex( + ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None, name="date" + ) + + # A. use dayfirst=True + with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first): + res5 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date" + ).index + tm.assert_index_equal(expected, res5) + + # B. use dayfirst=False + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res6 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" + ).index + tm.assert_index_equal(expected, res6) + + # C. use dayfirst default arg, same as B + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res7 = read_csv( + StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" + ).index + tm.assert_index_equal(expected, res7) + + # D. use infer_datetime_format=True + with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): + res8 = read_csv( + StringIO(input), + parse_dates=["date"], + infer_datetime_format=True, + index_col="date", + ).index + tm.assert_index_equal(expected, res8) From 8969a8e7715b8e42733f555e4a94e00a9ec3cb68 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Aug 2021 19:46:16 +0100 Subject: [PATCH 24/26] check expected_inconsistent in tests --- pandas/tests/io/parser/test_parse_dates.py | 19 ++++++++----------- pandas/tests/tools/test_to_datetime.py | 19 ++++++++----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 575fce41c2562..41f0b661611a6 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1717,35 +1717,32 @@ def test_dayfirst_warnings(): # CASE 1: valid input input = "date\n31/12/2014\n10/03/2011" - expected = DatetimeIndex( + expected_consistent = DatetimeIndex( ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date" ) + expected_inconsistent = DatetimeIndex( + ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None, name="date" + ) # A. dayfirst arg correct, no warning res1 = read_csv( StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date" ).index - tm.assert_index_equal(expected, res1) + tm.assert_index_equal(expected_consistent, res1) # B. dayfirst arg incorrect, warning + incorrect output with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): res2 = read_csv( StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" ).index - with pytest.raises(AssertionError, match=None), tm.assert_produces_warning( - UserWarning, match=warning_msg_day_first - ): - tm.assert_index_equal(expected, res2) + tm.assert_index_equal(expected_inconsistent, res2) # C. dayfirst default arg, same as B with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): res3 = read_csv( StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date" ).index - with pytest.raises(AssertionError, match=None), tm.assert_produces_warning( - UserWarning, match=warning_msg_day_first - ): - tm.assert_index_equal(expected, res3) + tm.assert_index_equal(expected_inconsistent, res3) # D. infer_datetime_format=True overrides dayfirst default # no warning + correct result @@ -1755,7 +1752,7 @@ def test_dayfirst_warnings(): infer_datetime_format=True, index_col="date", ).index - tm.assert_index_equal(expected, res4) + tm.assert_index_equal(expected_consistent, res4) # CASE 2: invalid input # cannot consistently process with single format diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index ac60237a99687..469a5caf7d694 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1852,34 +1852,31 @@ def test_dayfirst_warnings(self): # CASE 1: valid input arr = ["31/12/2014", "10/03/2011"] - expected = DatetimeIndex( + expected_consistent = DatetimeIndex( ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None ) + expected_inconsistent = DatetimeIndex( + ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None + ) # A. dayfirst arg correct, no warning res1 = to_datetime(arr, dayfirst=True) - tm.assert_index_equal(expected, res1) + tm.assert_index_equal(expected_consistent, res1) # B. dayfirst arg incorrect, warning + incorrect output with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): res2 = to_datetime(arr, dayfirst=False) - with pytest.raises(AssertionError, match=None), tm.assert_produces_warning( - UserWarning, match=warning_msg_day_first - ): - tm.assert_index_equal(expected, res2) + tm.assert_index_equal(expected_inconsistent, res2) # C. dayfirst default arg, same as B with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first): res3 = to_datetime(arr, dayfirst=False) - with pytest.raises(AssertionError, match=None), tm.assert_produces_warning( - UserWarning, match=warning_msg_day_first - ): - tm.assert_index_equal(expected, res3) + tm.assert_index_equal(expected_inconsistent, res3) # D. infer_datetime_format=True overrides dayfirst default # no warning + correct result res4 = to_datetime(arr, infer_datetime_format=True) - tm.assert_index_equal(expected, res4) + tm.assert_index_equal(expected_consistent, res4) # CASE 2: invalid input # cannot consistently process with single format From b6cbb5df04742c6fb5d8ac3a80ad2156c314f6b9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 22 Aug 2021 19:50:50 +0100 Subject: [PATCH 25/26] fixup docs --- doc/source/whatsnew/v1.4.0.rst | 3 ++- pandas/core/tools/datetimes.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index e3ccd887dd677..fc488504f1fdf 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -115,7 +115,8 @@ The ``dayfirst`` option of :func:`to_datetime` isn't strict, and this can lead t pd.to_datetime(["31-12-2021"], dayfirst=False) -Now, a warning will be raised if a date string cannot be parsed accordance to the given ``dayfirst`` value. +Now, a warning will be raised if a date string cannot be parsed accordance to the given ``dayfirst`` value when +the value is a delimited date string (e.g. ``31-12-2012``). .. _whatsnew_140.notable_bug_fixes.notable_bug_fix2: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 41b48b10765df..53c960db48f16 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -708,7 +708,7 @@ def to_datetime( with day first (this is a known bug, based on dateutil behavior). If a date string cannot be parsed in accordance with the given `dayfirst` option (e.g. ``to_datetime(['31-12-2021'], dayfirst=False)``) - then a warning will be shown. + then in the case of delimited date strings, a warning will be shown. yearfirst : bool, default False Specify a date parse order if `arg` is str or its list-likes. From c768e1d5a722aa9512f4ee2e6db7225422d55551 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 26 Aug 2021 21:10:52 +0100 Subject: [PATCH 26/26] remove note about dateutil bug --- pandas/core/tools/datetimes.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 53c960db48f16..3005dd958ab49 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -705,10 +705,9 @@ def to_datetime( .. warning:: dayfirst=True is not strict, but will prefer to parse - with day first (this is a known bug, based on dateutil behavior). - If a date string cannot be parsed in accordance with the given - `dayfirst` option (e.g. ``to_datetime(['31-12-2021'], dayfirst=False)``) - then in the case of delimited date strings, a warning will be shown. + with day first. If a delimited date string cannot be parsed in + accordance with the given `dayfirst` option, e.g. + ``to_datetime(['31-12-2021'])``, then a warning will be shown. yearfirst : bool, default False Specify a date parse order if `arg` is str or its list-likes. @@ -721,7 +720,7 @@ def to_datetime( .. warning:: yearfirst=True is not strict, but will prefer to parse - with year first (this is a known bug, based on dateutil behavior). + with year first. utc : bool, default None Return UTC DatetimeIndex if True (converting any tz-aware