diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 070e47d73cfae..ae82bdf5395d1 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -675,6 +675,7 @@ Deprecations - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`) - Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`) +- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 94cffe8fb840d..d31d75e7d4398 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -219,6 +219,8 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: elif isinstance(value, Timestamp): if value.tz is None: value = value.to_datetime64() + elif not isinstance(dtype, DatetimeTZDtype): + raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype") elif isinstance(value, Timedelta): value = value.to_timedelta64() @@ -1616,9 +1618,21 @@ def maybe_cast_to_datetime( # didn't specify one if dta.tz is not None: + warnings.warn( + "Data is timezone-aware. Converting " + "timezone-aware data to timezone-naive by " + "passing dtype='datetime64[ns]' to " + "DataFrame or Series is deprecated and will " + "raise in a future version. Use " + "`pd.Series(values).dt.tz_localize(None)` " + "instead.", + FutureWarning, + stacklevel=8, + ) # equiv: dta.view(dtype) # Note: NOT equivalent to dta.astype(dtype) dta = dta.tz_localize(None) + value = dta elif is_datetime64tz: dtype = cast(DatetimeTZDtype, dtype) @@ -1810,7 +1824,7 @@ def construct_2d_arraylike_from_scalar( shape = (length, width) if dtype.kind in ["m", "M"]: - value = maybe_unbox_datetimelike(value, dtype) + value = maybe_unbox_datetimelike_tz_deprecation(value, dtype, stacklevel=4) elif dtype == object: if isinstance(value, (np.timedelta64, np.datetime64)): # calling np.array below would cast to pytimedelta/pydatetime @@ -1873,7 +1887,7 @@ def construct_1d_arraylike_from_scalar( if not isna(value): value = ensure_str(value) elif dtype.kind in ["M", "m"]: - value = maybe_unbox_datetimelike(value, dtype) + value = maybe_unbox_datetimelike_tz_deprecation(value, dtype) subarr = np.empty(length, dtype=dtype) subarr.fill(value) @@ -1881,6 +1895,40 @@ def construct_1d_arraylike_from_scalar( return subarr +def maybe_unbox_datetimelike_tz_deprecation( + value: Scalar, dtype: DtypeObj, stacklevel: int = 5 +): + """ + Wrap maybe_unbox_datetimelike with a check for a timezone-aware Timestamp + along with a timezone-naive datetime64 dtype, which is deprecated. + """ + # Caller is responsible for checking dtype.kind in ["m", "M"] + try: + value = maybe_unbox_datetimelike(value, dtype) + except TypeError: + if ( + isinstance(value, Timestamp) + and value.tz is not None + and isinstance(dtype, np.dtype) + ): + warnings.warn( + "Data is timezone-aware. Converting " + "timezone-aware data to timezone-naive by " + "passing dtype='datetime64[ns]' to " + "DataFrame or Series is deprecated and will " + "raise in a future version. Use " + "`pd.Series(values).dt.tz_localize(None)` " + "instead.", + FutureWarning, + stacklevel=stacklevel, + ) + new_value = value.tz_localize(None) + return maybe_unbox_datetimelike(new_value, dtype) + else: + raise + return value + + def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: """ Transform any list-like object in a 1-dimensional numpy array of object diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6e9991ff17ac3..a5dc37ef32735 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2404,6 +2404,17 @@ def test_from_series_with_name_with_columns(self): expected = DataFrame(columns=["bar"]) tm.assert_frame_equal(result, expected) + def test_nested_list_columns(self): + # GH 14467 + result = DataFrame( + [[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]] + ) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]), + ) + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ: @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) @@ -2436,12 +2447,41 @@ def test_construction_preserves_tzaware_dtypes(self, tz): tm.assert_series_equal(result, expected) def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): - # GH#25843 + # GH#25843, GH#41555, GH#33401 tz = tz_aware_fixture - result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]") - expected = DataFrame({"d": [Timestamp("2019")]}) + ts = Timestamp("2019", tz=tz) + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame({0: [ts]}, dtype="datetime64[ns]") + + expected = DataFrame({0: [ts_naive]}) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = DataFrame({0: ts}, index=[0], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = DataFrame([ts], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df = DataFrame([Series([ts])], dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df = DataFrame([[ts]], columns=[0], dtype="datetime64[ns]") + tm.assert_equal(df, expected) + def test_from_dict(self): # 8260 @@ -2682,13 +2722,15 @@ def test_from_out_of_bounds_timedelta(self, constructor, cls): assert type(get1(result)) is cls - def test_nested_list_columns(self): - # GH 14467 - result = DataFrame( - [[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]] - ) - expected = DataFrame( - [[1, 2, 3], [4, 5, 6]], - columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]), - ) - tm.assert_frame_equal(result, expected) + def test_tzaware_data_tznaive_dtype(self, constructor): + tz = "US/Eastern" + ts = Timestamp("2019", tz=tz) + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning( + FutureWarning, match="Data is timezone-aware", check_stacklevel=False + ): + result = constructor(ts, dtype="M8[ns]") + + assert np.all(result.dtypes == "M8[ns]") + assert np.all(result == ts_naive) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e74d900d1b04d..41c0cbf58e438 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1536,10 +1536,26 @@ def test_constructor_tz_mixed_data(self): tm.assert_series_equal(result, expected) def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): - # GH#25843 + # GH#25843, GH#41555, GH#33401 tz = tz_aware_fixture - result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]") - expected = Series([Timestamp("2019")]) + ts = Timestamp("2019", tz=tz) + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([ts], dtype="datetime64[ns]") + expected = Series([ts_naive]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series(np.array([ts], dtype=object), dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = Series({0: ts}, dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = Series(ts, index=[0], dtype="datetime64[ns]") tm.assert_series_equal(result, expected) def test_constructor_datetime64(self):