Skip to content

PERF: ArrowExtensionArray.fillna when array does not contains any nulls #51635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 17, 2023
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ Performance improvements
- Performance improvement in :func:`read_parquet` and :meth:`DataFrame.to_parquet` when reading a remote file with ``engine="pyarrow"`` (:issue:`51609`)
- Performance improvement in :meth:`MultiIndex.sortlevel` when ``ascending`` is a list (:issue:`51612`)
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`)
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.fillna` when array does not contain nulls (:issue:`51635`)
- Performance improvement when parsing strings to ``boolean[pyarrow]`` dtype (:issue:`51730`)
- Performance improvement when searching an :class:`Index` sliced from other indexes (:issue:`51738`)
- Performance improvement in :meth:`Series.combine_first` (:issue:`51777`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,10 @@ def fillna(
) -> Self:
value, method = validate_fillna_kwargs(value, method)

if not self._hasna:
# TODO(CoW): Not necessary anymore when CoW is the default
return self.copy()

if limit is not None:
return super().fillna(value=value, method=method, limit=limit)

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pandas import (
NA,
ArrowDtype,
DataFrame,
Interval,
NaT,
Expand Down Expand Up @@ -286,6 +287,9 @@ def test_fillna_ea_noop_shares_memory(
if using_copy_on_write:
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
assert not df2._mgr._has_no_reference(1)
elif isinstance(df.dtypes[0], ArrowDtype):
# arrow is immutable, so no-ops do not need to copy underlying array
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
else:
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))

Expand Down Expand Up @@ -313,6 +317,9 @@ def test_fillna_inplace_ea_noop_shares_memory(
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
assert not df._mgr._has_no_reference(1)
assert not view._mgr._has_no_reference(1)
elif isinstance(df.dtypes[0], ArrowDtype):
# arrow is immutable, so no-ops do not need to copy underlying array
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
else:
assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
df.iloc[0, 1] = 100
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,8 +691,8 @@ def test_fillna_no_op_returns_copy(self, data):
result = data.fillna(valid)
assert result is not data
self.assert_extension_array_equal(result, data)
with tm.assert_produces_warning(PerformanceWarning):
result = data.fillna(method="backfill")

result = data.fillna(method="backfill")
assert result is not data
self.assert_extension_array_equal(result, data)

Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,7 @@ def test_fillna_no_op_returns_copy(self, data):
assert result is not data
self.assert_extension_array_equal(result, data)

with tm.maybe_produces_warning(
PerformanceWarning, data.dtype.storage == "pyarrow"
):
result = data.fillna(method="backfill")
result = data.fillna(method="backfill")
assert result is not data
self.assert_extension_array_equal(result, data)

Expand Down