diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 12d35288d1ee6..06746192532fd 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -28,6 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ +- Added :meth:`DataFrame.downcast` and :meth:`Series.downcast` (:issue:`51641`) - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8cd0ffadcc17c..2cc36eba92093 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5515,6 +5515,10 @@ def fillna( downcast=downcast, ) + @doc(NDFrame.downcast, **_shared_doc_kwargs) + def downcast(self) -> DataFrame: + return super().downcast() + def pop(self, item: Hashable) -> Series: """ Return item and drop from frame. Raise KeyError if not found. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 821e41db6b065..aba8c88f39c3b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7012,6 +7012,49 @@ def fillna( else: return result.__finalize__(self, method="fillna") + def downcast(self: NDFrameT) -> NDFrameT: + """Downcasts the columns to an appropriate dtype. + + Possibly casts floats to integers. The dtype is inferred. + + Returns + ------- + {klass} + {klass} with the same shape and converted columns. + + Notes + ----- + The downcasting logic protects against truncating floats. + If the values don't fit into the specified dtype, the column is ignored. + + Examples + -------- + >>> df = pd.DataFrame({"foo": [1.0, 2.0], "bar": [1.5, 2.5], "baz": [3.0, 4.0]}) + >>> df + foo bar baz + 0 1.0 1.5 3.0 + 1 2.0 2.5 4.0 + + >>> result = df.downcast() + >>> result + foo bar baz + 0 1 1.5 3 + 1 2 2.5 4 + + >>> result.dtypes + foo int64 + bar float64 + baz int64 + dtype: object + """ + if using_copy_on_write(): + result = self.copy(deep=False) + else: + result = self.copy(deep=True) + new_data = result._mgr.downcast("infer") + result = self._constructor(new_data) + return result.__finalize__(self, method="downcast") + @overload def ffill( self: NDFrameT, diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 2de970466e19f..17d36179aff15 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -373,6 +373,9 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast ) + def downcast(self: T, dtype) -> T: + return self.apply_with_block("downcast", dtype=dtype) + def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T: if copy is None: copy = True diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e5b30b20a79cd..8d86c34f5b43d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -421,6 +421,10 @@ def coerce_to_target_dtype(self, other) -> Block: return self.astype(new_dtype, copy=False) + @final + def downcast(self, dtype: DtypeObj, using_cow: bool = False) -> list[Block]: + return self._maybe_downcast([self], downcast=dtype, using_cow=using_cow) + @final def _maybe_downcast( self, blocks: list[Block], downcast=None, using_cow: bool = False diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 500d79b1a0de7..65d3dabcebc19 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -438,6 +438,9 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: using_cow=using_copy_on_write(), ) + def downcast(self: T, dtype) -> T: + return self.apply("downcast", dtype=dtype, using_cow=using_copy_on_write()) + def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T: if copy is None: if using_copy_on_write(): diff --git a/pandas/core/series.py b/pandas/core/series.py index 03d7b25aca49a..3851af0d85d13 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5081,6 +5081,10 @@ def fillna( downcast=downcast, ) + @doc(NDFrame.downcast, **_shared_doc_kwargs) + def downcast(self) -> Series: + return super().downcast() + def pop(self, item: Hashable) -> Any: """ Return item and drops from series. Raise KeyError if not found. diff --git a/pandas/tests/copy_view/test_downcast.py b/pandas/tests/copy_view/test_downcast.py new file mode 100644 index 0000000000000..b741267d610ed --- /dev/null +++ b/pandas/tests/copy_view/test_downcast.py @@ -0,0 +1,21 @@ +import numpy as np + +from pandas import DataFrame +import pandas._testing as tm +from pandas.tests.copy_view.util import get_array + + +class TestDowncast: + def test_downcast(self, using_copy_on_write): + df = DataFrame({"a": [1.0, 2.0], "b": 1.5}) + df_orig = df.copy() + result = df.downcast() + + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df, "b"), get_array(result, "b")) + else: + assert not np.shares_memory(get_array(df, "b"), get_array(result, "b")) + + result.iloc[0, 1] = 100.5 + tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/frame/methods/test_downcast.py b/pandas/tests/frame/methods/test_downcast.py new file mode 100644 index 0000000000000..e1d60e76938e3 --- /dev/null +++ b/pandas/tests/frame/methods/test_downcast.py @@ -0,0 +1,10 @@ +from pandas import DataFrame +import pandas._testing as tm + + +class TestDowncast: + def test_downcast(self): + df = DataFrame({"a": [1.0, 2.0], "b": 1.5, "c": 2.0, "d": "a"}) + result = df.downcast() + expected = DataFrame({"a": [1, 2], "b": 1.5, "c": 2, "d": "a"}) + tm.assert_frame_equal(result, expected)