From 9021944f86ac0534937711dc308c162f1f522f15 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 22 Feb 2023 21:56:50 +0000 Subject: [PATCH 1/4] BUG: transpose inferring dtype for dt in object column --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/frame.py | 10 ++++- pandas/tests/frame/methods/test_transpose.py | 45 +++++++++++++++++++- pandas/tests/groupby/test_quantile.py | 2 +- 4 files changed, 53 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index eff79dda821a0..1c49ed978511f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1386,6 +1386,7 @@ Reshaping - Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`) - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`) - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`) +- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`) - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would ignore arguments when passed a list of functions (:issue:`50863`) Sparse diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8c10e1fdd2d43..a28776653cc3b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3549,7 +3549,11 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: new_vals = new_vals.copy() result = self._constructor( - new_vals, index=self.columns, columns=self.index, copy=False + new_vals, + index=self.columns, + columns=self.index, + copy=False, + dtype=new_vals.dtype, ) if using_copy_on_write() and len(self) > 0: result._mgr.add_references(self._mgr) # type: ignore[arg-type] @@ -3571,7 +3575,9 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: new_arr = self.values.T if copy: new_arr = new_arr.copy() - result = self._constructor(new_arr, index=self.columns, columns=self.index) + result = self._constructor( + new_arr, index=self.columns, columns=self.index, dtype=new_arr.dtype + ) return result.__finalize__(self, method="transpose") diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 6213a6dbbd0ca..23a7504fa71ba 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -7,6 +7,8 @@ DataFrame, DatetimeIndex, IntervalIndex, + Series, + Timestamp, date_range, timedelta_range, ) @@ -63,7 +65,7 @@ def test_transpose_tzaware_2col_mixed_tz(self): df4 = DataFrame({"A": dti, "B": dti2}) assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() assert (df4.T.dtypes == object).all() - tm.assert_frame_equal(df4.T.T, df4) + tm.assert_frame_equal(df4.T.T, df4, check_dtype=False) @pytest.mark.parametrize("tz", [None, "America/New_York"]) def test_transpose_preserves_dtindex_equality_with_dst(self, tz): @@ -83,7 +85,7 @@ def test_transpose_object_to_tzaware_mixed_tz(self): df2 = DataFrame([dti, dti2]) assert (df2.dtypes == object).all() res2 = df2.T - assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() + assert (res2.dtypes == object).all() def test_transpose_uint64(self, uint64_frame): result = uint64_frame.T @@ -128,3 +130,42 @@ def test_transpose_get_view_dt64tzget_view(self): rtrip = result._mgr.blocks[0].values assert np.shares_memory(arr._ndarray, rtrip._ndarray) + + def test_transpose_not_inferring_dt(self): + # GH#51546 + df = DataFrame( + { + "a": [Timestamp("2019-12-31"), Timestamp("2019-12-31")], + }, + dtype=object, + ) + result = df.T + expected = DataFrame( + [[Timestamp("2019-12-31"), Timestamp("2019-12-31")]], + columns=[0, 1], + index=["a"], + dtype=object, + ) + tm.assert_frame_equal(result, expected) + + def test_transpose_not_inferring_dt_mixed_blocks(self): + # GH#51546 + df = DataFrame( + { + "a": Series( + [Timestamp("2019-12-31"), Timestamp("2019-12-31")], dtype=object + ), + "b": [Timestamp("2019-12-31"), Timestamp("2019-12-31")], + } + ) + result = df.T + expected = DataFrame( + [ + [Timestamp("2019-12-31"), Timestamp("2019-12-31")], + [Timestamp("2019-12-31"), Timestamp("2019-12-31")], + ], + columns=[0, 1], + index=["a", "b"], + dtype=object, + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 79354e550d3f6..cce59a6742181 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -464,7 +464,7 @@ def test_groupby_quantile_dt64tz_period(): # Check that we match the group-by-group result exp = {i: df.iloc[i::5].quantile(0.5) for i in range(5)} - expected = DataFrame(exp).T + expected = DataFrame([x.tolist() for x in exp.values()], index=exp.keys()) expected.index = expected.index.astype(np.int_) tm.assert_frame_equal(result, expected) From 0cf4e4c9255f9cb77088cb1f102558b164be5854 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 22 Feb 2023 23:26:30 +0000 Subject: [PATCH 2/4] Address review --- pandas/conftest.py | 1 + pandas/tests/frame/methods/test_transpose.py | 2 +- pandas/tests/groupby/test_quantile.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index aab6de1724677..a1ad1abc65dff 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1919,6 +1919,7 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ + pd.options.mode.copy_on_write = True return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block" diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 23a7504fa71ba..e8710cea95219 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -65,7 +65,7 @@ def test_transpose_tzaware_2col_mixed_tz(self): df4 = DataFrame({"A": dti, "B": dti2}) assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() assert (df4.T.dtypes == object).all() - tm.assert_frame_equal(df4.T.T, df4, check_dtype=False) + tm.assert_frame_equal(df4.T.T, df4.astype(object)) @pytest.mark.parametrize("tz", [None, "America/New_York"]) def test_transpose_preserves_dtindex_equality_with_dst(self, tz): diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index cce59a6742181..71608358c6d46 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -464,7 +464,7 @@ def test_groupby_quantile_dt64tz_period(): # Check that we match the group-by-group result exp = {i: df.iloc[i::5].quantile(0.5) for i in range(5)} - expected = DataFrame([x.tolist() for x in exp.values()], index=exp.keys()) + expected = DataFrame(exp).T.infer_objects() expected.index = expected.index.astype(np.int_) tm.assert_frame_equal(result, expected) From 3e8dc57bddd553df307619dab7b889eac393d18f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 22 Feb 2023 23:26:55 +0000 Subject: [PATCH 3/4] Move whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 - doc/source/whatsnew/v2.1.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1c49ed978511f..eff79dda821a0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1386,7 +1386,6 @@ Reshaping - Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`) - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`) - Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`) -- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`) - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` would ignore arguments when passed a list of functions (:issue:`50863`) Sparse diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index aeaafbc4c125d..b1cd2b2a12239 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -186,7 +186,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`) - Sparse From e2edec6b88b763282f837da773f417caa54b39e0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 27 Feb 2023 02:11:47 +0100 Subject: [PATCH 4/4] Clean --- pandas/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index a1ad1abc65dff..aab6de1724677 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1919,7 +1919,6 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - pd.options.mode.copy_on_write = True return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block"