Skip to content

Commit 04ec93e

Browse files
lukemanleyphofl
andauthored
PERF: ArrowExtensionArray.fillna when array does not contains any nulls (#51635)
* PERF: ArrowExtensionArray.fillna when array does not have any nulls * whatsnew * fix test * fix test * Update array.py * adjust CoW tests for arrow dtypes --------- Co-authored-by: Patrick Hoefler <[email protected]>
1 parent be209b7 commit 04ec93e

File tree

5 files changed

+15
-6
lines changed

5 files changed

+15
-6
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ Performance improvements
120120
- Performance improvement in :func:`read_parquet` and :meth:`DataFrame.to_parquet` when reading a remote file with ``engine="pyarrow"`` (:issue:`51609`)
121121
- Performance improvement in :meth:`MultiIndex.sortlevel` when ``ascending`` is a list (:issue:`51612`)
122122
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`)
123+
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.fillna` when array does not contain nulls (:issue:`51635`)
123124
- Performance improvement when parsing strings to ``boolean[pyarrow]`` dtype (:issue:`51730`)
124125
- Performance improvement when searching an :class:`Index` sliced from other indexes (:issue:`51738`)
125126
- Performance improvement in :meth:`Series.combine_first` (:issue:`51777`)

pandas/core/arrays/arrow/array.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,10 @@ def fillna(
766766
) -> Self:
767767
value, method = validate_fillna_kwargs(value, method)
768768

769+
if not self._hasna:
770+
# TODO(CoW): Not necessary anymore when CoW is the default
771+
return self.copy()
772+
769773
if limit is not None:
770774
return super().fillna(value=value, method=method, limit=limit)
771775

pandas/tests/copy_view/test_interp_fillna.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from pandas import (
55
NA,
6+
ArrowDtype,
67
DataFrame,
78
Interval,
89
NaT,
@@ -286,6 +287,9 @@ def test_fillna_ea_noop_shares_memory(
286287
if using_copy_on_write:
287288
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
288289
assert not df2._mgr._has_no_reference(1)
290+
elif isinstance(df.dtypes[0], ArrowDtype):
291+
# arrow is immutable, so no-ops do not need to copy underlying array
292+
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
289293
else:
290294
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
291295

@@ -313,6 +317,9 @@ def test_fillna_inplace_ea_noop_shares_memory(
313317
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
314318
assert not df._mgr._has_no_reference(1)
315319
assert not view._mgr._has_no_reference(1)
320+
elif isinstance(df.dtypes[0], ArrowDtype):
321+
# arrow is immutable, so no-ops do not need to copy underlying array
322+
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
316323
else:
317324
assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
318325
df.iloc[0, 1] = 100

pandas/tests/extension/test_arrow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -691,8 +691,8 @@ def test_fillna_no_op_returns_copy(self, data):
691691
result = data.fillna(valid)
692692
assert result is not data
693693
self.assert_extension_array_equal(result, data)
694-
with tm.assert_produces_warning(PerformanceWarning):
695-
result = data.fillna(method="backfill")
694+
695+
result = data.fillna(method="backfill")
696696
assert result is not data
697697
self.assert_extension_array_equal(result, data)
698698

pandas/tests/extension/test_string.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,7 @@ def test_fillna_no_op_returns_copy(self, data):
165165
assert result is not data
166166
self.assert_extension_array_equal(result, data)
167167

168-
with tm.maybe_produces_warning(
169-
PerformanceWarning, data.dtype.storage == "pyarrow"
170-
):
171-
result = data.fillna(method="backfill")
168+
result = data.fillna(method="backfill")
172169
assert result is not data
173170
self.assert_extension_array_equal(result, data)
174171

0 commit comments

Comments
 (0)