From d680398380d236b2784efe30eddaba3804946451 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 19 Jun 2023 09:56:00 -0700 Subject: [PATCH 1/8] ENH: EA.fillna copy=True --- pandas/core/arrays/_mixins.py | 18 +++++-- pandas/core/arrays/arrow/array.py | 7 +-- pandas/core/arrays/base.py | 19 ++++++- pandas/core/arrays/interval.py | 11 +++- pandas/core/arrays/masked.py | 21 ++++++-- pandas/core/arrays/period.py | 8 +-- pandas/core/arrays/sparse/array.py | 4 ++ pandas/core/internals/blocks.py | 53 ++++++++++++++++---- pandas/tests/copy_view/test_interp_fillna.py | 15 ++++-- 9 files changed, 125 insertions(+), 31 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 0100c17805d76..b6447176613b4 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -293,7 +293,9 @@ def _fill_mask_inplace( func(self._ndarray.T, limit=limit, mask=mask.T) @doc(ExtensionArray.fillna) - def fillna(self, value=None, method=None, limit: int | None = None) -> Self: + def fillna( + self, value=None, method=None, limit: int | None = None, copy: bool = True + ) -> Self: value, method = validate_fillna_kwargs( value, method, validate_scalar_dict_value=False ) @@ -310,7 +312,9 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self: # TODO: check value is None # (for now) when self.ndim == 2, we assume axis=0 func = missing.get_fill_func(method, ndim=self.ndim) - npvalues = self._ndarray.T.copy() + npvalues = self._ndarray.T + if copy: + npvalues = npvalues.copy() func(npvalues, limit=limit, mask=mask.T) npvalues = npvalues.T @@ -318,14 +322,20 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self: new_values = self._from_backing_data(npvalues) else: # fill with value - new_values = self.copy() + if copy: + new_values = self.copy() + else: + new_values = self[:] new_values[mask] = value else: # We validate the fill_value even if there is nothing to fill if value is not None: self._validate_setitem_value(value) - new_values = self.copy() + if not copy: + new_values = self[:] + else: + new_values = self.copy() return new_values # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0ca136914b614..15dba87d082d4 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -907,6 +907,7 @@ def fillna( value: object | ArrayLike | None = None, method: FillnaOptions | None = None, limit: int | None = None, + copy: bool = True, ) -> Self: value, method = validate_fillna_kwargs(value, method) @@ -915,11 +916,11 @@ def fillna( return self.copy() if limit is not None: - return super().fillna(value=value, method=method, limit=limit) + return super().fillna(value=value, method=method, limit=limit, copy=copy) if method is not None: fallback_performancewarning() - return super().fillna(value=value, method=method, limit=limit) + return super().fillna(value=value, method=method, limit=limit, copy=copy) if isinstance(value, (np.ndarray, ExtensionArray)): # Similar to check_value_size, but we do not mask here since we may @@ -962,7 +963,7 @@ def convert_fill_value(value, pa_type, dtype): # a kernel for duration types. pass - return super().fillna(value=value, method=method, limit=limit) + return super().fillna(value=value, method=method, limit=limit, copy=copy) def isin(self, values) -> npt.NDArray[np.bool_]: # short-circuit to return all False array. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 27eb7994d3ccb..6594938a7238b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -758,6 +758,7 @@ def fillna( value: object | ArrayLike | None = None, method: FillnaOptions | None = None, limit: int | None = None, + copy: bool = True, ) -> Self: """ Fill NA/NaN values using the specified method. @@ -782,6 +783,12 @@ def fillna( maximum number of entries along the entire axis where NaNs will be filled. + copy : bool, default True + Whether to make a copy of the data before filling. If False, then + the original should be modified and no new memory should be allocated. + For ExtensionArray subclasses that cannot do this, it is at the + author's discretion whether to ignore "copy=False" or to raise. + Returns ------- ExtensionArray @@ -802,12 +809,20 @@ def fillna( npvalues = self.astype(object) func(npvalues, limit=limit, mask=mask) new_values = self._from_sequence(npvalues, dtype=self.dtype) + if not copy: + self[:] = new_values else: # fill with value - new_values = self.copy() + if not copy: + new_values = self[:] + else: + new_values = self.copy() new_values[mask] = value else: - new_values = self.copy() + if not copy: + new_values = self[:] + else: + new_values = self.copy() return new_values def dropna(self) -> Self: diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 7f874a07341eb..d55370fd55b2a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -886,7 +886,9 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr indexer = obj.argsort()[-1] return obj[indexer] - def fillna(self, value=None, method=None, limit: int | None = None) -> Self: + def fillna( + self, value=None, method=None, limit: int | None = None, copy: bool = True + ) -> Self: """ Fill NA/NaN values using the specified method. @@ -908,6 +910,11 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self: be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be filled. + copy : bool, default True + Whether to make a copy of the data before filling. If False, then + the original should be modified and no new memory should be allocated. + For ExtensionArray subclasses that cannot do this, it is at the + author's discretion whether to ignore "copy=False" or to raise. Returns ------- @@ -917,6 +924,8 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self: raise TypeError("Filling by method is not supported for IntervalArray.") if limit is not None: raise TypeError("limit is not supported for IntervalArray.") + if copy is False: + raise NotImplementedError value_left, value_right = self._validate_scalar(value) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 15c485cbb1499..3c86400d97b84 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -179,7 +179,9 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: return self._simple_new(self._data[item], newmask) @doc(ExtensionArray.fillna) - def fillna(self, value=None, method=None, limit: int | None = None) -> Self: + def fillna( + self, value=None, method=None, limit: int | None = None, copy: bool = True + ) -> Self: value, method = validate_fillna_kwargs(value, method) mask = self._mask @@ -189,16 +191,25 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self: if mask.any(): if method is not None: func = missing.get_fill_func(method, ndim=self.ndim) - npvalues = self._data.copy().T - new_mask = mask.copy().T + npvalues = self._data.T + new_mask = mask.T + if copy: + npvalues = npvalues.copy() + new_mask = new_mask.copy() func(npvalues, limit=limit, mask=new_mask) return self._simple_new(npvalues.T, new_mask.T) else: # fill with value - new_values = self.copy() + if copy: + new_values = self.copy() + else: + new_values = self[:] new_values[mask] = value else: - new_values = self.copy() + if copy: + new_values = self.copy() + else: + new_values = self[:] return new_values @classmethod diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 266dda52c6d0d..1baa2b035d81d 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -781,16 +781,18 @@ def searchsorted( m8arr = self._ndarray.view("M8[ns]") return m8arr.searchsorted(npvalue, side=side, sorter=sorter) - def fillna(self, value=None, method=None, limit: int | None = None) -> Self: + def fillna( + self, value=None, method=None, limit: int | None = None, copy: bool = True + ) -> Self: if method is not None: # view as dt64 so we get treated as timelike in core.missing, # similar to dtl._period_dispatch dta = self.view("M8[ns]") - result = dta.fillna(value=value, method=method, limit=limit) + result = dta.fillna(value=value, method=method, limit=limit, copy=copy) # error: Incompatible return value type (got "Union[ExtensionArray, # ndarray[Any, Any]]", expected "PeriodArray") return result.view(self.dtype) # type: ignore[return-value] - return super().fillna(value=value, method=method, limit=limit) + return super().fillna(value=value, method=method, limit=limit, copy=copy) # ------------------------------------------------------------------ # Arithmetic Methods diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 269b7a086de93..0d363fb0dd9c8 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -722,6 +722,7 @@ def fillna( value=None, method: FillnaOptions | None = None, limit: int | None = None, + copy: bool = True, ) -> Self: """ Fill missing values with `value`. @@ -739,6 +740,9 @@ def fillna( limit : int, optional + copy: bool, default True + Ignored for SparseArray. + Returns ------- SparseArray diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 981e29df2c323..9370a8440832b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1850,12 +1850,24 @@ def interpolate( **kwargs, ): values = self.values + refs = None + copy = not inplace + if using_cow: + if inplace and not self.refs.has_reference(): + refs = self.refs + else: + copy = True + if values.ndim == 2 and axis == 0: # NDArrayBackedExtensionArray.fillna assumes axis=1 - new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T + new_values = values.T.fillna( + value=fill_value, method=method, limit=limit, copy=copy + ).T else: - new_values = values.fillna(value=fill_value, method=method, limit=limit) - return self.make_block_same_class(new_values) + new_values = values.fillna( + value=fill_value, method=method, limit=limit, copy=copy + ) + return self.make_block_same_class(new_values, refs=refs) class ExtensionBlock(libinternals.Block, EABackedBlock): @@ -1898,7 +1910,16 @@ def fillna( new_values = self.values else: refs = None - new_values = self.values.fillna(value=value, method=None, limit=limit) + copy = not inplace + if using_cow: + if inplace and not self.refs.has_reference(): + refs = self.refs + else: + copy = True + + new_values = self.values.fillna( + value=value, method=None, limit=limit, copy=copy + ) nb = self.make_block_same_class(new_values, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow=using_cow) @@ -2285,13 +2306,25 @@ def interpolate( **kwargs, ) return self.make_block_same_class(new_values, refs=refs) - - elif values.ndim == 2 and axis == 0: - # NDArrayBackedExtensionArray.fillna assumes axis=1 - new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T else: - new_values = values.fillna(value=fill_value, method=method, limit=limit) - return self.make_block_same_class(new_values) + copy = not inplace + refs = None + if using_cow: + if inplace and not self.refs.has_reference(): + refs = self.refs + else: + copy = True + + if values.ndim == 2 and axis == 0: + # NDArrayBackedExtensionArray.fillna assumes axis=1 + new_values = values.T.fillna( + value=fill_value, method=method, limit=limit, copy=copy + ).T + else: + new_values = values.fillna( + value=fill_value, method=method, limit=limit, copy=copy + ) + return self.make_block_same_class(new_values, refs=refs) class DatetimeTZBlock(DatetimeLikeBlock): diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 71023a538a2c3..3f3cc14e65769 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -326,7 +326,11 @@ def test_fillna_inplace_ea_noop_shares_memory( view = df[:] df.fillna(100, inplace=True) - assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) + if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: + assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) + else: + # MaskedArray can actually respect inplace=True + assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) if using_copy_on_write: assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) @@ -336,6 +340,11 @@ def test_fillna_inplace_ea_noop_shares_memory( # arrow is immutable, so no-ops do not need to copy underlying array assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) else: - assert not np.shares_memory(get_array(df, "b"), get_array(view, "b")) + # MaskedArray can actually respect inplace=True + assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) df.iloc[0, 1] = 100 - tm.assert_frame_equal(df_orig, view) + if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: + tm.assert_frame_equal(df_orig, view) + else: + # we actually have a view + tm.assert_frame_equal(df, view) From 98fafd7b476cc75c1d60ccaf84ed0ca4b669563e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Jun 2023 16:05:48 -0700 Subject: [PATCH 2/8] simplify test --- pandas/tests/copy_view/test_interp_fillna.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 3f3cc14e65769..7acfd254b010f 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -336,12 +336,11 @@ def test_fillna_inplace_ea_noop_shares_memory( assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) assert not df._mgr._has_no_reference(1) assert not view._mgr._has_no_reference(1) - elif isinstance(df.dtypes.iloc[0], ArrowDtype): - # arrow is immutable, so no-ops do not need to copy underlying array - assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) else: + # arrow is immutable, so no-ops do not need to copy underlying array # MaskedArray can actually respect inplace=True assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) + df.iloc[0, 1] = 100 if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: tm.assert_frame_equal(df_orig, view) From 34731b1baf121166371d0a56ef6e8eea8806f3cf Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Jun 2023 09:32:00 -0700 Subject: [PATCH 3/8] simplify --- pandas/tests/copy_view/test_interp_fillna.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index d35fbfc65e1d2..c8eb6546b904c 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -335,14 +335,10 @@ def test_fillna_inplace_ea_noop_shares_memory( # MaskedArray can actually respect inplace=True assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) + assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) assert not df._mgr._has_no_reference(1) assert not view._mgr._has_no_reference(1) - else: - # arrow is immutable, so no-ops do not need to copy underlying array - # MaskedArray can actually respect inplace=True - assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) df.iloc[0, 1] = 100 if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: From c09fd75cf5248148308db92bbe147882087e5336 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Jun 2023 13:29:46 -0700 Subject: [PATCH 4/8] Ignore copy in base class, test for 3rd party EA that hasnt implemented --- pandas/core/arrays/base.py | 4 +- pandas/core/internals/blocks.py | 45 ++++++++++++++++--- pandas/tests/extension/decimal/array.py | 10 +++++ .../tests/extension/decimal/test_decimal.py | 40 ++++++++++++++++- 4 files changed, 90 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index bdf34fc467590..b90c77e71ee68 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -788,6 +788,8 @@ def fillna( the original should be modified and no new memory should be allocated. For ExtensionArray subclasses that cannot do this, it is at the author's discretion whether to ignore "copy=False" or to raise. + The base class implementation ignores the keyword in pad/backfill + cases. Returns ------- @@ -809,8 +811,6 @@ def fillna( npvalues = self.astype(object) func(npvalues, limit=limit, mask=mask) new_values = self._from_sequence(npvalues, dtype=self.dtype) - if not copy: - self[:] = new_values else: # fill with value if not copy: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bae7e517d640b..2b19c48d6bde7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -12,6 +12,7 @@ cast, final, ) +import warnings import numpy as np @@ -42,6 +43,7 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.astype import ( @@ -1910,9 +1912,25 @@ def interpolate( value=fill_value, method=method, limit=limit, copy=copy ).T else: - new_values = values.fillna( - value=fill_value, method=method, limit=limit, copy=copy - ) + try: + new_values = values.fillna( + value=fill_value, method=method, limit=limit, copy=copy + ) + except TypeError: + # 3rd party EA that has not implemented copy keyword yet + warnings.warn( + # GH#53278 + "ExtensionArray.fillna added a 'copy' keyword in pandas " + "2.1.0. In a future version, ExtensionArray subclasses will " + "need to implement this keyword or an exception will be " + "raised. In the interim, the keyword is ignored by " + f"{type(self.values).__name__}.", + FutureWarning, + stacklevel=find_stack_level(), + ) + refs = None + new_values = values.fillna(value=fill_value, method=method, limit=limit) + return self.make_block_same_class(new_values, refs=refs) @@ -1963,9 +1981,24 @@ def fillna( else: copy = True - new_values = self.values.fillna( - value=value, method=None, limit=limit, copy=copy - ) + try: + new_values = self.values.fillna( + value=value, method=None, limit=limit, copy=copy + ) + except TypeError: + # 3rd party EA that has not implemented copy keyword yet + warnings.warn( + # GH#53278 + "ExtensionArray.fillna added a 'copy' keyword in pandas " + "2.1.0. In a future version, ExtensionArray subclasses will " + "need to implement this keyword or an exception will be " + "raised. In the interim, the keyword is ignored by " + f"{type(self.values).__name__}.", + FutureWarning, + stacklevel=find_stack_level(), + ) + refs = None + new_values = self.values.fillna(value=value, method=None, limit=limit) nb = self.make_block_same_class(new_values, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow=using_cow) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 393c01488c234..564cff0f08843 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -276,6 +276,16 @@ def convert_values(param): def value_counts(self, dropna: bool = True): return value_counts(self.to_numpy(), dropna=dropna) + # Simulate a 3rd-party EA that has not yet updated to include a "copy" + # keyword in its fillna method. + def fillna( + self, + value=None, + method=None, + limit: int | None = None, + ): + return super().fillna(value=value, method=method, limit=limit, copy=True) + def to_decimal(values, context=None): return DecimalArray([decimal.Decimal(x) for x in values], context=context) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index afd04817f05c7..e525ee82a3044 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -97,7 +97,45 @@ class TestIndex(base.BaseIndexTests): class TestMissing(base.BaseMissingTests): - pass + def test_fillna_frame(self, data_missing): + msg = "ExtensionArray.fillna added a 'copy' keyword" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_frame(data_missing) + + def test_fillna_limit_pad(self, data_missing): + msg = "ExtensionArray.fillna added a 'copy' keyword" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_limit_pad(data_missing) + + def test_fillna_limit_backfill(self, data_missing): + msg = "|".join( + [ + "ExtensionArray.fillna added a 'copy' keyword", + "Series.fillna with 'method' is deprecated", + ] + ) + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_limit_backfill(data_missing) + + def test_fillna_series(self, data_missing): + msg = "ExtensionArray.fillna added a 'copy' keyword" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_series(data_missing) + + def test_fillna_series_method(self, data_missing, fillna_method): + msg = "ExtensionArray.fillna added a 'copy' keyword" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_series_method(data_missing, fillna_method) class Reduce: From 89f95a2fb5fc55a5b56e4f7ef06819d3c65fa139 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 26 Jun 2023 15:37:43 -0700 Subject: [PATCH 5/8] fix tests --- pandas/core/internals/blocks.py | 15 ++++++++++----- pandas/tests/extension/decimal/array.py | 3 ++- pandas/tests/extension/decimal/test_decimal.py | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2b19c48d6bde7..53a48c67b97a5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1354,7 +1354,7 @@ def pad_or_backfill( inplace: bool = False, limit: int | None = None, limit_direction: Literal["forward", "backward", "both"] = "forward", - limit_area: str | None = None, + limit_area: Literal["inside", "outside"] | None = None, fill_value: Any | None = None, downcast: Literal["infer"] | None = None, using_cow: bool = False, @@ -1918,6 +1918,10 @@ def interpolate( ) except TypeError: # 3rd party EA that has not implemented copy keyword yet + refs = None + new_values = values.fillna(value=fill_value, method=method, limit=limit) + # issue the warning *after* retrying, in case the TypeError + # was caused by an invalid fill_value warnings.warn( # GH#53278 "ExtensionArray.fillna added a 'copy' keyword in pandas " @@ -1928,8 +1932,6 @@ def interpolate( FutureWarning, stacklevel=find_stack_level(), ) - refs = None - new_values = values.fillna(value=fill_value, method=method, limit=limit) return self.make_block_same_class(new_values, refs=refs) @@ -1987,6 +1989,10 @@ def fillna( ) except TypeError: # 3rd party EA that has not implemented copy keyword yet + refs = None + new_values = self.values.fillna(value=value, method=None, limit=limit) + # issue the warning *after* retrying, in case the TypeError + # was caused by an invalid fill_value warnings.warn( # GH#53278 "ExtensionArray.fillna added a 'copy' keyword in pandas " @@ -1997,8 +2003,7 @@ def fillna( FutureWarning, stacklevel=find_stack_level(), ) - refs = None - new_values = self.values.fillna(value=value, method=None, limit=limit) + nb = self.make_block_same_class(new_values, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow=using_cow) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 564cff0f08843..a264848a08e3d 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -278,7 +278,8 @@ def value_counts(self, dropna: bool = True): # Simulate a 3rd-party EA that has not yet updated to include a "copy" # keyword in its fillna method. - def fillna( + # error: Signature of "fillna" incompatible with supertype "ExtensionArray" + def fillna( # type: ignore[override] self, value=None, method=None, diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index e525ee82a3044..f8c03394efab2 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -163,6 +163,20 @@ class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): class TestMethods(base.BaseMethodsTests): + def test_fillna_copy_frame(self, data_missing): + msg = "ExtensionArray.fillna added a 'copy' keyword" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_copy_frame(data_missing) + + def test_fillna_copy_series(self, data_missing): + msg = "ExtensionArray.fillna added a 'copy' keyword" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_copy_series(data_missing) + @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): all_data = all_data[:10] From dff594f9b5b250accbfe7bede0d6f756509c4bfe Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Jun 2023 07:48:18 -0700 Subject: [PATCH 6/8] fix CoW test --- pandas/tests/extension/decimal/test_decimal.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index f8c03394efab2..b5ab208d263aa 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -163,18 +163,16 @@ class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): class TestMethods(base.BaseMethodsTests): - def test_fillna_copy_frame(self, data_missing): + def test_fillna_copy_frame(self, data_missing, using_copy_on_write): + warn = FutureWarning if not using_copy_on_write else None msg = "ExtensionArray.fillna added a 'copy' keyword" - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): super().test_fillna_copy_frame(data_missing) - def test_fillna_copy_series(self, data_missing): + def test_fillna_copy_series(self, data_missing, using_copy_on_write): + warn = FutureWarning if not using_copy_on_write else None msg = "ExtensionArray.fillna added a 'copy' keyword" - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): super().test_fillna_copy_series(data_missing) @pytest.mark.parametrize("dropna", [True, False]) From 3d21ea49753b71d44afde251fcdddca6abdbd03d Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 9 Jul 2023 13:08:21 -0700 Subject: [PATCH 7/8] fix axis --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index af69a0dbcb6b3..b0e0cfc41498e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1889,7 +1889,7 @@ def pad_or_backfill( values = self.values copy, refs = self._get_refs_and_copy(using_cow, inplace) - if values.ndim == 2 and axis == 0: + if values.ndim == 2 and axis == 1: # NDArrayBackedExtensionArray.fillna assumes axis=1 new_values = values.T.fillna(method=method, limit=limit, copy=copy).T else: From 3446312db04a25505f241459015b99791368249d Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 10 Jul 2023 12:15:12 -0700 Subject: [PATCH 8/8] typo fixup --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b0e0cfc41498e..131ec409ab3ba 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1890,7 +1890,7 @@ def pad_or_backfill( copy, refs = self._get_refs_and_copy(using_cow, inplace) if values.ndim == 2 and axis == 1: - # NDArrayBackedExtensionArray.fillna assumes axis=1 + # NDArrayBackedExtensionArray.fillna assumes axis=0 new_values = values.T.fillna(method=method, limit=limit, copy=copy).T else: try: