diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c6783e46faaee..3f898ca23bd6f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -120,6 +120,7 @@ Performance improvements - Performance improvement in :func:`read_parquet` and :meth:`DataFrame.to_parquet` when reading a remote file with ``engine="pyarrow"`` (:issue:`51609`) - Performance improvement in :meth:`MultiIndex.sortlevel` when ``ascending`` is a list (:issue:`51612`) - Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`) +- Performance improvement in :meth:`~arrays.ArrowExtensionArray.fillna` when array does not contain nulls (:issue:`51635`) - Performance improvement when parsing strings to ``boolean[pyarrow]`` dtype (:issue:`51730`) - Performance improvement when searching an :class:`Index` sliced from other indexes (:issue:`51738`) - Performance improvement in :meth:`Series.combine_first` (:issue:`51777`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 551b925f42579..468bc78a79dd5 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -766,6 +766,10 @@ def fillna( ) -> Self: value, method = validate_fillna_kwargs(value, method) + if not self._hasna: + # TODO(CoW): Not necessary anymore when CoW is the default + return self.copy() + if limit is not None: return super().fillna(value=value, method=method, limit=limit) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 35bd5d47b36dc..91c6b96767142 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -3,6 +3,7 @@ from pandas import ( NA, + ArrowDtype, DataFrame, Interval, NaT, @@ -286,6 +287,9 @@ def test_fillna_ea_noop_shares_memory( if using_copy_on_write: assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) assert not df2._mgr._has_no_reference(1) + elif isinstance(df.dtypes[0], ArrowDtype): + # arrow is immutable, so no-ops do not need to copy underlying array + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) else: assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) @@ -313,6 +317,9 @@ def test_fillna_inplace_ea_noop_shares_memory( assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) assert not df._mgr._has_no_reference(1) assert not view._mgr._has_no_reference(1) + elif isinstance(df.dtypes[0], ArrowDtype): + # arrow is immutable, so no-ops do not need to copy underlying array + assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) else: assert not np.shares_memory(get_array(df, "b"), get_array(view, "b")) df.iloc[0, 1] = 100 diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2378710555340..517626f8c2abb 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -691,8 +691,8 @@ def test_fillna_no_op_returns_copy(self, data): result = data.fillna(valid) assert result is not data self.assert_extension_array_equal(result, data) - with tm.assert_produces_warning(PerformanceWarning): - result = data.fillna(method="backfill") + + result = data.fillna(method="backfill") assert result is not data self.assert_extension_array_equal(result, data) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index cc8bd91ca3cb4..1f39e8e9b450e 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -165,10 +165,7 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data self.assert_extension_array_equal(result, data) - with tm.maybe_produces_warning( - PerformanceWarning, data.dtype.storage == "pyarrow" - ): - result = data.fillna(method="backfill") + result = data.fillna(method="backfill") assert result is not data self.assert_extension_array_equal(result, data)