From a5b615183cf20e3843c8fb186b874b46d41faeff Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 9 Nov 2021 09:48:56 -0800 Subject: [PATCH] CLN: split/fixturize to_datetime tests --- pandas/tests/tools/test_to_datetime.py | 388 ++++++++++++------------- 1 file changed, 186 insertions(+), 202 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 1f75bc11005bc..4867ba58838ef 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -46,6 +46,14 @@ from pandas.core.tools.datetimes import start_caching_at +@pytest.fixture(params=[True, False]) +def cache(request): + """ + cache keyword to pass to to_datetime. + """ + return request.param + + class TestTimeConversionFormats: @pytest.mark.parametrize("readonly", [True, False]) def test_to_datetime_readonly(self, readonly): @@ -57,7 +65,6 @@ def test_to_datetime_readonly(self, readonly): expected = to_datetime([]) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_format(self, cache): values = ["1/1/2000", "1/2/2000", "1/3/2000"] @@ -82,7 +89,6 @@ def test_to_datetime_format(self, cache): else: tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_format_YYYYMMDD(self, cache): s = Series([19801222, 19801222] + [19810105] * 5) expected = Series([Timestamp(x) for x in s.apply(str)]) @@ -109,17 +115,18 @@ def test_to_datetime_format_YYYYMMDD(self, cache): result = to_datetime(s, format="%Y%m%d", cache=cache) tm.assert_series_equal(result, expected) + def test_to_datetime_format_YYYYMMDD_coercion(self, cache): # coercion # GH 7930 - s = Series([20121231, 20141231, 99991231]) - result = to_datetime(s, format="%Y%m%d", errors="ignore", cache=cache) + ser = Series([20121231, 20141231, 99991231]) + result = to_datetime(ser, format="%Y%m%d", errors="ignore", cache=cache) expected = Series( [datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)], dtype=object, ) tm.assert_series_equal(result, expected) - result = to_datetime(s, format="%Y%m%d", errors="coerce", cache=cache) + result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache) expected = Series(["20121231", "20141231", "NaT"], dtype="M8[ns]") tm.assert_series_equal(result, expected) @@ -199,7 +206,6 @@ def test_to_datetime_with_NA(self, data, format, expected): result = to_datetime(data, format=format) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_format_integer(self, cache): # GH 10178 s = Series([2000, 2001, 2002]) @@ -236,7 +242,6 @@ def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, expected): result = to_datetime(int_date, format="%Y%m%d", errors="ignore") assert result == expected - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_format_microsecond(self, cache): # these are locale dependent @@ -249,7 +254,6 @@ def test_to_datetime_format_microsecond(self, cache): exp = datetime.strptime(val, format) assert result == exp - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_format_time(self, cache): data = [ ["01/10/2010 15:20", "%m/%d/%Y %H:%M", Timestamp("2010-01-10 15:20")], @@ -259,6 +263,7 @@ def test_to_datetime_format_time(self, cache): "%m/%d/%Y %H:%M:%S", Timestamp("2010-01-10 13:56:01"), ] # , + # FIXME: don't leave commented-out # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', # Timestamp('2010-01-10 20:14')], # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', @@ -270,7 +275,6 @@ def test_to_datetime_format_time(self, cache): assert to_datetime(s, format=format, cache=cache) == dt @td.skip_if_has_locale - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_with_non_exact(self, cache): # GH 10834 # 8904 @@ -284,7 +288,6 @@ def test_to_datetime_with_non_exact(self, cache): ) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_parse_nanoseconds_with_formula(self, cache): # GH8989 @@ -300,14 +303,15 @@ def test_parse_nanoseconds_with_formula(self, cache): result = to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f", cache=cache) assert result == expected - @pytest.mark.parametrize("cache", [True, False]) - def test_to_datetime_format_weeks(self, cache): - data = [ + @pytest.mark.parametrize( + "value,fmt,expected", + [ ["2009324", "%Y%W%w", Timestamp("2009-08-13")], ["2013020", "%Y%U%w", Timestamp("2013-01-13")], - ] - for s, format, dt in data: - assert to_datetime(s, format=format, cache=cache) == dt + ], + ) + def test_to_datetime_format_weeks(self, value, fmt, expected, cache): + assert to_datetime(value, format=fmt, cache=cache) == expected @pytest.mark.parametrize( "fmt,dates,expected_dates", @@ -601,7 +605,6 @@ def test_to_datetime_today_now_unicode_bytes(self): to_datetime(["now"]) to_datetime(["today"]) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_dt64s(self, cache): in_bound_dts = [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")] @@ -611,7 +614,6 @@ def test_to_datetime_dt64s(self, cache): @pytest.mark.parametrize( "dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")] ) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): msg = f"Out of bounds nanosecond timestamp: {dt}" with pytest.raises(OutOfBoundsDatetime, match=msg): @@ -620,7 +622,6 @@ def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): Timestamp(dt) assert to_datetime(dt, errors="coerce", cache=cache) is NaT - @pytest.mark.parametrize("cache", [True, False]) @pytest.mark.parametrize("unit", ["s", "D"]) def test_to_datetime_array_of_dt64s(self, cache, unit): # https://github.com/pandas-dev/pandas/issues/31491 @@ -659,7 +660,6 @@ def test_to_datetime_array_of_dt64s(self, cache, unit): Index([dt.item() for dt in dts_with_oob]), ) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_tz(self, cache): # xref 8260 @@ -686,7 +686,6 @@ def test_to_datetime_tz(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(arr, cache=cache) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_different_offsets(self, cache): # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark # see GH-26097 for more @@ -697,7 +696,6 @@ def test_to_datetime_different_offsets(self, cache): result = to_datetime(arr, cache=cache) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_tz_pytz(self, cache): # see gh-8260 us_eastern = pytz.timezone("US/Eastern") @@ -720,19 +718,16 @@ def test_to_datetime_tz_pytz(self, cache): ) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) @pytest.mark.parametrize( - "init_constructor, end_constructor, test_method", + "init_constructor, end_constructor", [ - (Index, DatetimeIndex, tm.assert_index_equal), - (list, DatetimeIndex, tm.assert_index_equal), - (np.array, DatetimeIndex, tm.assert_index_equal), - (Series, Series, tm.assert_series_equal), + (Index, DatetimeIndex), + (list, DatetimeIndex), + (np.array, DatetimeIndex), + (Series, Series), ], ) - def test_to_datetime_utc_true( - self, cache, init_constructor, end_constructor, test_method - ): + def test_to_datetime_utc_true(self, cache, init_constructor, end_constructor): # See gh-11934 & gh-6415 data = ["20100102 121314", "20100102 121315"] expected_data = [ @@ -744,14 +739,13 @@ def test_to_datetime_utc_true( init_constructor(data), format="%Y%m%d %H%M%S", utc=True, cache=cache ) expected = end_constructor(expected_data) - test_method(result, expected) + tm.assert_equal(result, expected) # Test scalar case as well for scalar, expected in zip(data, expected_data): result = to_datetime(scalar, format="%Y%m%d %H%M%S", utc=True, cache=cache) assert result == expected - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_utc_true_with_series_single_value(self, cache): # GH 15760 UTC=True with Series ts = 1.5e18 @@ -759,7 +753,6 @@ def test_to_datetime_utc_true_with_series_single_value(self, cache): expected = Series([Timestamp(ts, tz="utc")]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_utc_true_with_series_tzaware_string(self, cache): ts = "2013-01-01 00:00:00-01:00" expected_ts = "2013-01-01 01:00:00" @@ -768,7 +761,6 @@ def test_to_datetime_utc_true_with_series_tzaware_string(self, cache): expected = Series([Timestamp(expected_ts, tz="utc")] * 3) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) @pytest.mark.parametrize( "date, dtype", [ @@ -781,7 +773,6 @@ def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, dtype): result = to_datetime(Series([date], dtype=dtype), utc=True, cache=cache) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) @td.skip_if_no("psycopg2") def test_to_datetime_tz_psycopg2(self, cache): @@ -822,7 +813,6 @@ def test_to_datetime_tz_psycopg2(self, cache): expected = DatetimeIndex(["2000-01-01 13:00:00"], dtype="datetime64[ns, UTC]") tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_datetime_bool(self, cache): # GH13176 msg = r"dtype bool cannot be converted to datetime64\[ns\]" @@ -945,18 +935,6 @@ def test_to_datetime_cache(self, utc, format, constructor): tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( - "listlike", - [ - (deque([Timestamp("2010-06-02 09:30:00")] * 51)), - ([Timestamp("2010-06-02 09:30:00")] * 51), - (tuple([Timestamp("2010-06-02 09:30:00")] * 51)), - ], - ) - def test_no_slicing_errors_in_should_cache(self, listlike): - # GH 29403 - assert tools.should_cache(listlike) is True - def test_to_datetime_from_deque(self): # GH 29403 result = to_datetime(deque([Timestamp("2010-06-02 09:30:00")] * 51)) @@ -1198,7 +1176,6 @@ def test_to_datetime_fixed_offset(self): class TestToDatetimeUnit: - @pytest.mark.parametrize("cache", [True, False]) def test_unit(self, cache): # GH 11758 # test proper behavior with errors @@ -1247,17 +1224,19 @@ def test_unit(self, cache): with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(values, errors="raise", unit="s", cache=cache) + def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache): # if we have a string, then we raise a ValueError # and NOT an OutOfBoundsDatetime - for val in ["foo", Timestamp("20130101")]: - try: - to_datetime(val, errors="raise", unit="s", cache=cache) - except OutOfBoundsDatetime as err: - raise AssertionError("incorrect exception raised") from err - except ValueError: - pass - - @pytest.mark.parametrize("cache", [True, False]) + + try: + to_datetime("foo", errors="raise", unit="s", cache=cache) + except OutOfBoundsDatetime as err: + raise AssertionError("incorrect exception raised") from err + except ValueError: + pass + else: + assert False, "Failed to raise ValueError" + def test_unit_consistency(self, cache): # consistency of conversions @@ -1274,7 +1253,6 @@ def test_unit_consistency(self, cache): assert result == expected assert isinstance(result, Timestamp) - @pytest.mark.parametrize("cache", [True, False]) def test_unit_with_numeric(self, cache): # GH 13180 @@ -1303,7 +1281,6 @@ def test_unit_with_numeric(self, cache): result = to_datetime(arr, errors="coerce", cache=cache) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_unit_mixed(self, cache): # mixed integers/datetimes @@ -1324,7 +1301,6 @@ def test_unit_mixed(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(arr, errors="raise", cache=cache) - @pytest.mark.parametrize("cache", [True, False]) def test_unit_rounding(self, cache): # GH 14156 & GH 20445: argument will incur floating point errors # but no premature rounding @@ -1332,17 +1308,105 @@ def test_unit_rounding(self, cache): expected = Timestamp("2015-06-19 19:55:31.877000192") assert result == expected - @pytest.mark.parametrize("cache", [True, False]) def test_unit_ignore_keeps_name(self, cache): # GH 21697 expected = Index([15e9] * 2, name="name") result = to_datetime(expected, errors="ignore", unit="s", cache=cache) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) - def test_dataframe(self, cache): + def test_to_datetime_errors_ignore_utc_true(self): + # GH#23758 + result = to_datetime([1], unit="s", utc=True, errors="ignore") + expected = DatetimeIndex(["1970-01-01 00:00:01"], tz="UTC") + tm.assert_index_equal(result, expected) + + # TODO: this is moved from tests.series.test_timeseries, may be redundant + def test_to_datetime_unit(self): + + epoch = 1370745748 + s1 = Series([epoch + t for t in range(20)]) + s2 = Series([epoch + t for t in range(20)]).astype(float) + + for ser in [s1, s2]: + result = to_datetime(ser, unit="s") + expected = Series( + [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) + for t in range(20) + ] + ) + tm.assert_series_equal(result, expected) + + s1 = Series([epoch + t for t in range(20)] + [iNaT]) + s2 = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) + s3 = Series([epoch + t for t in range(20)] + [np.nan]) + + for ser in [s1, s2, s3]: + result = to_datetime(ser, unit="s") + expected = Series( + [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) + for t in range(20) + ] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + def test_to_datetime_unit_fractional_seconds(self): + + # GH13834 + epoch = 1370745748 + s = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float) + result = to_datetime(s, unit="s") + expected = Series( + [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) + for t in np.arange(0, 2, 0.25) + ] + + [NaT] + ) + # GH20455 argument will incur floating point errors but no premature rounding + result = result.round("ms") + tm.assert_series_equal(result, expected) + + def test_to_datetime_unit_na_values(self): + result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D") + expected = DatetimeIndex( + [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3 + ) + tm.assert_index_equal(result, expected) + + def test_to_datetime_unit_invalid(self): + msg = "non convertible value foo with the unit 'D'" + with pytest.raises(ValueError, match=msg): + to_datetime([1, 2, "foo"], unit="D") + msg = "cannot convert input 111111111 with the unit 'D'" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime([1, 2, 111111111], unit="D") + + def test_to_timestamp_unit_coerce(self): + # coerce we can process + expected = DatetimeIndex( + [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1 + ) + result = to_datetime([1, 2, "foo"], unit="D", errors="coerce") + tm.assert_index_equal(result, expected) - df = DataFrame( + result = to_datetime([1, 2, 111111111], unit="D", errors="coerce") + tm.assert_index_equal(result, expected) + + +class TestToDatetimeDataFrame: + @pytest.fixture(params=[True, False]) + def cache(self, request): + """ + cache keyword to pass to to_datetime. + """ + return request.param + + @pytest.fixture + def df(self): + return DataFrame( { "year": [2015, 2016], "month": [2, 3], @@ -1356,6 +1420,8 @@ def test_dataframe(self, cache): } ) + def test_dataframe(self, df, cache): + result = to_datetime( {"year": df["year"], "month": df["month"], "day": df["day"]}, cache=cache ) @@ -1377,6 +1443,7 @@ def test_dataframe(self, cache): ) tm.assert_series_equal(result, expected2) + def test_dataframe_field_aliases_column_subset(self, df, cache): # unit mappings units = [ { @@ -1404,6 +1471,7 @@ def test_dataframe(self, cache): ) tm.assert_series_equal(result, expected) + def test_dataframe_field_aliases(self, df, cache): d = { "year": "year", "month": "month", @@ -1425,10 +1493,18 @@ def test_dataframe(self, cache): ) tm.assert_series_equal(result, expected) + def test_dataframe_str_dtype(self, df, cache): # coerce back to int result = to_datetime(df.astype(str), cache=cache) + expected = Series( + [ + Timestamp("20150204 06:58:10.001002003"), + Timestamp("20160305 07:59:11.001002003"), + ] + ) tm.assert_series_equal(result, expected) + def test_dataframe_coerce(self, cache): # passing coerce df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) @@ -1438,10 +1514,12 @@ def test_dataframe(self, cache): ) with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) + result = to_datetime(df2, errors="coerce", cache=cache) expected = Series([Timestamp("20150204 00:00:00"), NaT]) tm.assert_series_equal(result, expected) + def test_dataframe_extra_keys_raisesm(self, df, cache): # extra columns msg = r"extra keys have been passed to the datetime assemblage: \[foo\]" with pytest.raises(ValueError, match=msg): @@ -1449,6 +1527,7 @@ def test_dataframe(self, cache): df2["foo"] = 1 to_datetime(df2, cache=cache) + def test_dataframe_missing_keys_raises(self, df, cache): # not enough msg = ( r"to assemble mappings requires at least that \[year, month, " @@ -1464,6 +1543,7 @@ def test_dataframe(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(df[c], cache=cache) + def test_dataframe_duplicate_columns_raises(self, cache): # duplicates msg = "cannot assemble with duplicate keys" df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) @@ -1478,9 +1558,8 @@ def test_dataframe(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) - @pytest.mark.parametrize("cache", [True, False]) def test_dataframe_dtypes(self, cache): - # #13451 + # GH#13451 df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) # int16 @@ -1506,7 +1585,7 @@ def test_dataframe_dtypes(self, cache): to_datetime(df, cache=cache) def test_dataframe_utc_true(self): - # GH 23760 + # GH#23760 df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) result = to_datetime(df, utc=True) expected = Series( @@ -1514,94 +1593,6 @@ def test_dataframe_utc_true(self): ).dt.tz_localize("UTC") tm.assert_series_equal(result, expected) - def test_to_datetime_errors_ignore_utc_true(self): - # GH 23758 - result = to_datetime([1], unit="s", utc=True, errors="ignore") - expected = DatetimeIndex(["1970-01-01 00:00:01"], tz="UTC") - tm.assert_index_equal(result, expected) - - # TODO: this is moved from tests.series.test_timeseries, may be redundant - def test_to_datetime_unit(self): - - epoch = 1370745748 - s = Series([epoch + t for t in range(20)]) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - ) - tm.assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)]).astype(float) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - ) - tm.assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)] + [iNaT]) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - + [NaT] - ) - tm.assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - + [NaT] - ) - tm.assert_series_equal(result, expected) - - # GH13834 - s = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float) - result = to_datetime(s, unit="s") - expected = Series( - [ - Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) - for t in np.arange(0, 2, 0.25) - ] - + [NaT] - ) - # GH20455 argument will incur floating point errors but no premature rounding - result = result.round("ms") - tm.assert_series_equal(result, expected) - - s = pd.concat( - [Series([epoch + t for t in range(20)]).astype(float), Series([np.nan])], - ignore_index=True, - ) - result = to_datetime(s, unit="s") - expected = Series( - [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] - + [NaT] - ) - tm.assert_series_equal(result, expected) - - result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D") - expected = DatetimeIndex( - [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3 - ) - tm.assert_index_equal(result, expected) - - msg = "non convertible value foo with the unit 'D'" - with pytest.raises(ValueError, match=msg): - to_datetime([1, 2, "foo"], unit="D") - msg = "cannot convert input 111111111 with the unit 'D'" - with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime([1, 2, 111111111], unit="D") - - # coerce we can process - expected = DatetimeIndex( - [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1 - ) - result = to_datetime([1, 2, "foo"], unit="D", errors="coerce") - tm.assert_index_equal(result, expected) - - result = to_datetime([1, 2, 111111111], unit="D", errors="coerce") - tm.assert_index_equal(result, expected) - class TestToDatetimeMisc: def test_to_datetime_barely_out_of_bounds(self): @@ -1614,7 +1605,6 @@ def test_to_datetime_barely_out_of_bounds(self): with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(arr) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_iso8601(self, cache): result = to_datetime(["2012-01-01 00:00:00"], cache=cache) exp = Timestamp("2012-01-01 00:00:00") @@ -1624,19 +1614,17 @@ def test_to_datetime_iso8601(self, cache): exp = Timestamp("2012-10-01") assert result[0] == exp - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_default(self, cache): rs = to_datetime("2001", cache=cache) xp = datetime(2001, 1, 1) assert rs == xp # dayfirst is essentially broken - + # FIXME: don't leave commented-out # to_datetime('01-13-2012', dayfirst=True) # pytest.raises(ValueError, to_datetime('01-13-2012', # dayfirst=True)) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_on_datetime64_series(self, cache): # #2699 s = Series(date_range("1/1/2000", periods=10)) @@ -1644,7 +1632,6 @@ def test_to_datetime_on_datetime64_series(self, cache): result = to_datetime(s, cache=cache) assert result[0] == s[0] - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_with_space_in_series(self, cache): # GH 6428 s = Series(["10/18/2006", "10/18/2008", " "]) @@ -1658,7 +1645,6 @@ def test_to_datetime_with_space_in_series(self, cache): tm.assert_series_equal(result_ignore, s) @td.skip_if_has_locale - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_with_apply(self, cache): # this is only locale tested with US/None locales # GH 5195 @@ -1681,7 +1667,6 @@ def test_to_datetime_with_apply(self, cache): ) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_types(self, cache): # empty string @@ -1701,18 +1686,19 @@ def test_to_datetime_types(self, cache): result = to_datetime("2012", cache=cache) assert result == expected + # FIXME: don't leave commented-out # array = ['2012','20120101','20120101 12:01:01'] array = ["20120101", "20120101 12:01:01"] expected = list(to_datetime(array, cache=cache)) result = [Timestamp(date_str) for date_str in array] tm.assert_almost_equal(result, expected) + # FIXME: don't leave commented-out # currently fails ### # result = Timestamp('2012') # expected = to_datetime('2012') # assert result == expected - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_unprocessable_input(self, cache): # GH 4928 # GH 21864 @@ -1724,7 +1710,6 @@ def test_to_datetime_unprocessable_input(self, cache): with pytest.raises(TypeError, match=msg): to_datetime([1, "1"], errors="raise", cache=cache) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_unhashable_input(self, cache): series = Series([["a"]] * 100) result = to_datetime(series, errors="ignore", cache=cache) @@ -1765,7 +1750,6 @@ def test_to_datetime_overflow(self): with pytest.raises(OutOfBoundsTimedelta, match=msg): date_range(start="1/1/1700", freq="B", periods=100000) - @pytest.mark.parametrize("cache", [True, False]) def test_string_na_nat_conversion(self, cache): # GH #999, #858 @@ -1846,7 +1830,6 @@ def test_string_na_nat_conversion(self, cache): "datetime64[ns]", ], ) - @pytest.mark.parametrize("cache", [True, False]) def test_dti_constructor_numpy_timeunits(self, cache, dtype): # GH 9114 base = to_datetime(["2000-01-01T00:00", "2000-01-02T00:00", "NaT"], cache=cache) @@ -1856,7 +1839,6 @@ def test_dti_constructor_numpy_timeunits(self, cache, dtype): tm.assert_index_equal(DatetimeIndex(values), base) tm.assert_index_equal(to_datetime(values, cache=cache), base) - @pytest.mark.parametrize("cache", [True, False]) def test_dayfirst(self, cache): # GH 5917 arr = ["10/02/2014", "11/02/2014", "12/02/2014"] @@ -1980,7 +1962,6 @@ def test_guess_datetime_format_for_array(self): class TestToDatetimeInferFormat: - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_infer_datetime_format_consistent_format(self, cache): s = Series(date_range("20000101", periods=50, freq="H")) @@ -2002,7 +1983,6 @@ def test_to_datetime_infer_datetime_format_consistent_format(self, cache): tm.assert_series_equal(with_format, no_infer) tm.assert_series_equal(no_infer, yes_infer) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_infer_datetime_format_inconsistent_format(self, cache): s = Series( np.array( @@ -2024,7 +2004,6 @@ def test_to_datetime_infer_datetime_format_inconsistent_format(self, cache): to_datetime(s, infer_datetime_format=True, cache=cache), ) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_infer_datetime_format_series_with_nans(self, cache): s = Series( np.array( @@ -2037,7 +2016,6 @@ def test_to_datetime_infer_datetime_format_series_with_nans(self, cache): to_datetime(s, infer_datetime_format=True, cache=cache), ) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache): s = Series( np.array( @@ -2086,7 +2064,6 @@ def test_infer_datetime_format_zero_tz(self, ts, zero_tz, is_utc): expected = Series([Timestamp(ts, tz=tz)]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_iso8601_noleading_0s(self, cache): # GH 11871 s = Series(["2014-1-1", "2014-2-2", "2015-3-3"]) @@ -2104,7 +2081,6 @@ def test_to_datetime_iso8601_noleading_0s(self, cache): class TestDaysInMonth: # tests for issue #10154 - @pytest.mark.parametrize("cache", [True, False]) def test_day_not_in_month_coerce(self, cache): assert isna(to_datetime("2015-02-29", errors="coerce", cache=cache)) assert isna( @@ -2117,7 +2093,6 @@ def test_day_not_in_month_coerce(self, cache): to_datetime("2015-04-31", format="%Y-%m-%d", errors="coerce", cache=cache) ) - @pytest.mark.parametrize("cache", [True, False]) def test_day_not_in_month_raise(self, cache): msg = "day is out of range for month" with pytest.raises(ValueError, match=msg): @@ -2135,7 +2110,6 @@ def test_day_not_in_month_raise(self, cache): with pytest.raises(ValueError, match=msg): to_datetime("2015-04-31", errors="raise", format="%Y-%m-%d", cache=cache) - @pytest.mark.parametrize("cache", [True, False]) def test_day_not_in_month_ignore(self, cache): assert to_datetime("2015-02-29", errors="ignore", cache=cache) == "2015-02-29" assert ( @@ -2205,7 +2179,6 @@ class TestDatetimeParsingWrappers: }.items() ), ) - @pytest.mark.parametrize("cache", [True, False]) def test_parsers(self, date_str, expected, cache): # dateutil >= 2.5.0 defaults to yearfirst=True @@ -2237,7 +2210,6 @@ def test_parsers(self, date_str, expected, cache): result7 = date_range(date_str, freq="S", periods=1, yearfirst=yearfirst) assert result7 == expected - @pytest.mark.parametrize("cache", [True, False]) def test_na_values_with_cache( self, cache, unique_nulls_fixture, unique_nulls_fixture2 ): @@ -2257,7 +2229,6 @@ def test_parsers_nat(self): assert result3 is NaT assert result4 is NaT - @pytest.mark.parametrize("cache", [True, False]) def test_parsers_dayfirst_yearfirst(self, cache): # OK # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 @@ -2345,7 +2316,6 @@ def test_parsers_dayfirst_yearfirst(self, cache): assert result3 == expected assert result4 == expected - @pytest.mark.parametrize("cache", [True, False]) def test_parsers_timestring(self, cache): # must be the same as dateutil result cases = { @@ -2368,7 +2338,6 @@ def test_parsers_timestring(self, cache): assert result4 == exp_now assert result5 == exp_now - @pytest.mark.parametrize("cache", [True, False]) @pytest.mark.parametrize( "dt_string, tz, dt_string_repr", [ @@ -2564,29 +2533,44 @@ def test_arg_tz_ns_unit(self, offset, utc, exp): tm.assert_index_equal(result, expected) -@pytest.mark.parametrize( - "listlike,do_caching", - [([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], False), ([1, 1, 1, 1, 4, 5, 6, 7, 8, 9], True)], -) -def test_should_cache(listlike, do_caching): - assert ( - tools.should_cache(listlike, check_count=len(listlike), unique_share=0.7) - == do_caching +class TestShouldCache: + @pytest.mark.parametrize( + "listlike,do_caching", + [ + ([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], False), + ([1, 1, 1, 1, 4, 5, 6, 7, 8, 9], True), + ], ) + def test_should_cache(self, listlike, do_caching): + assert ( + tools.should_cache(listlike, check_count=len(listlike), unique_share=0.7) + == do_caching + ) + @pytest.mark.parametrize( + "unique_share,check_count, err_message", + [ + (0.5, 11, r"check_count must be in next bounds: \[0; len\(arg\)\]"), + (10, 2, r"unique_share must be in next bounds: \(0; 1\)"), + ], + ) + def test_should_cache_errors(self, unique_share, check_count, err_message): + arg = [5] * 10 -@pytest.mark.parametrize( - "unique_share,check_count, err_message", - [ - (0.5, 11, r"check_count must be in next bounds: \[0; len\(arg\)\]"), - (10, 2, r"unique_share must be in next bounds: \(0; 1\)"), - ], -) -def test_should_cache_errors(unique_share, check_count, err_message): - arg = [5] * 10 + with pytest.raises(AssertionError, match=err_message): + tools.should_cache(arg, unique_share, check_count) - with pytest.raises(AssertionError, match=err_message): - tools.should_cache(arg, unique_share, check_count) + @pytest.mark.parametrize( + "listlike", + [ + (deque([Timestamp("2010-06-02 09:30:00")] * 51)), + ([Timestamp("2010-06-02 09:30:00")] * 51), + (tuple([Timestamp("2010-06-02 09:30:00")] * 51)), + ], + ) + def test_no_slicing_errors_in_should_cache(self, listlike): + # GH#29403 + assert tools.should_cache(listlike) is True def test_nullable_integer_to_datetime(): @@ -2624,7 +2608,7 @@ def test_na_to_datetime(nulls_fixture, klass): assert result[0] is NaT -def test_empty_string_datetime_coerce__format(): +def test_empty_string_datetime_coerce_format(): # GH13044 td = Series(["03/24/2016", "03/25/2016", ""]) format = "%m/%d/%Y"