From d3ae0cf1c4a28ec549e48f65937ed50e2be86218 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 21 Feb 2021 12:07:43 +0000 Subject: [PATCH 1/8] PERF: avoid object conversion in fillna(method=pad|backfill) for masked arrays --- asv_bench/benchmarks/frame_methods.py | 24 ++++++++++++---- pandas/_libs/algos.pyx | 32 +++++++++++++++++++++ pandas/core/arrays/masked.py | 41 ++++++++++++++++++++++++++- pandas/core/missing.py | 32 ++++++++++++++++----- 4 files changed, 115 insertions(+), 14 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index bd068cec4641b..52db81c8329a6 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -351,15 +351,27 @@ def time_isnull_obj(self): class Fillna: - params = ([True, False], ["pad", "bfill"]) - param_names = ["inplace", "method"] - - def setup(self, inplace, method): + params = ( + [True, False], + ["pad", "bfill"], + [ + "float64", + "float32", + "object", + "Int64", + "Float64", + ], + ) + param_names = ["inplace", "method", "dtype"] + + def setup(self, inplace, method, dtype): values = np.random.randn(10000, 100) values[::2] = np.nan - self.df = DataFrame(values) + if dtype == "Int64": + values = values.round() + self.df = DataFrame(values, dtype=dtype) - def time_frame_fillna(self, inplace, method): + def time_frame_fillna(self, inplace, method, dtype): self.df.fillna(inplace=inplace, method=method) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 1a1b263ae356e..cc225186c6b37 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -623,6 +623,38 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None): val = values[i] +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_inplace_masked(algos_t[:] values, uint8_t[:] mask, limit=None): + cdef: + Py_ssize_t i, N + algos_t val + uint8_t prev_mask + int lim, fill_count = 0 + + N = len(values) + + # GH#2778 + if N == 0: + return + + lim = validate_limit(N, limit) + + val = values[0] + prev_mask = mask[0] + for i in range(1, N): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + mask[i] = prev_mask + else: + fill_count = 0 + val = values[i] + prev_mask = mask[i] + + @cython.boundscheck(False) @cython.wraparound(False) def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None): diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index bae14f4e560c2..ebe63003760f7 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -28,6 +28,7 @@ cache_readonly, doc, ) +from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( @@ -38,12 +39,16 @@ is_string_dtype, pandas_dtype, ) +from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import ( isna, notna, ) -from pandas.core import nanops +from pandas.core import ( + missing, + nanops, +) from pandas.core.algorithms import ( factorize_array, isin, @@ -144,6 +149,40 @@ def __getitem__( return type(self)(self._data[item], self._mask[item]) + @doc(ExtensionArray.fillna) + def fillna( + self: BaseMaskedArrayT, value=None, method=None, limit=None + ) -> BaseMaskedArrayT: + value, method = validate_fillna_kwargs(value, method) + + mask = self._mask + + if is_array_like(value): + if len(value) != len(self): + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f" expected {len(self)}" + ) + value = value[mask] + + if mask.any(): + if method is not None: + func = missing.get_fill_func(method) + new_values, new_mask = func( + self._data.copy(), + limit=limit, + mask=mask.copy(), + update_mask=True, + ) + return type(self)(new_values, new_mask.view(np.bool_)) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values + def _coerce_to_array(self, values) -> Tuple[np.ndarray, np.ndarray]: raise AbstractMethodError(self) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 9ae5f7d1b7497..b57554a3ec34f 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -692,19 +692,37 @@ def _fillna_prep(values, mask=None): return values, mask -def _pad_1d(values, limit=None, mask=None): +def _pad_1d( + values: np.ndarray, + limit: int | None = None, + mask: np.ndarray | None = None, + update_mask: bool = False, +) -> np.ndarray | tuple[np.ndarray, np.ndarray]: values, mask = _fillna_prep(values, mask) - algos.pad_inplace(values, mask, limit=limit) - return values + if update_mask: + algos.pad_inplace_masked(values, mask, limit=limit) + return values, mask + else: + algos.pad_inplace(values, mask, limit=limit) + return values -def _backfill_1d(values, limit=None, mask=None): +def _backfill_1d( + values: np.ndarray, + limit: int | None = None, + mask: np.ndarray | None = None, + update_mask: bool = False, +) -> np.ndarray | tuple[np.ndarray, np.ndarray]: values, mask = _fillna_prep(values, mask) - algos.backfill_inplace(values, mask, limit=limit) - return values + if update_mask: + algos.pad_inplace_masked(values[::-1], mask[::-1], limit=limit) + return values, mask + else: + algos.backfill_inplace(values, mask, limit=limit) + return values -def _pad_2d(values, limit=None, mask=None): +def _pad_2d(values, limit=None, mask=None, update_mask: bool = False): values, mask = _fillna_prep(values, mask) if np.all(values.shape): From 0b4d283207f4ced1d2ecaf4d8c7a5fa535c9631d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 21 Feb 2021 14:10:45 +0000 Subject: [PATCH 2/8] revert rogue change --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b57554a3ec34f..c7eb3448ad8bb 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -722,7 +722,7 @@ def _backfill_1d( return values -def _pad_2d(values, limit=None, mask=None, update_mask: bool = False): +def _pad_2d(values, limit=None, mask=None): values, mask = _fillna_prep(values, mask) if np.all(values.shape): From 9024d950eabe9f85f6a0fd0f08ff28b775c451dc Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 21 Feb 2021 20:41:30 +0000 Subject: [PATCH 3/8] always update and return mask --- pandas/_libs/algos.pyx | 38 ++++++------------------------ pandas/core/arrays/_mixins.py | 2 +- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/masked.py | 1 - pandas/core/arrays/string_arrow.py | 2 +- pandas/core/missing.py | 22 +++++------------ pandas/core/series.py | 2 +- 7 files changed, 17 insertions(+), 52 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index cc225186c6b37..5783d3c2353aa 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -597,35 +597,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None): - cdef: - Py_ssize_t i, N - algos_t val - int lim, fill_count = 0 - - N = len(values) - - # GH#2778 - if N == 0: - return - - lim = validate_limit(N, limit) - - val = values[0] - for i in range(N): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_inplace_masked(algos_t[:] values, uint8_t[:] mask, limit=None): +def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None): cdef: Py_ssize_t i, N algos_t val @@ -642,7 +614,7 @@ def pad_inplace_masked(algos_t[:] values, uint8_t[:] mask, limit=None): val = values[0] prev_mask = mask[0] - for i in range(1, N): + for i in range(N): if mask[i]: if fill_count >= lim: continue @@ -771,10 +743,11 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray: @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None): +def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None): cdef: Py_ssize_t i, N algos_t val + uint8_t prev_mask int lim, fill_count = 0 N = len(values) @@ -786,15 +759,18 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None): lim = validate_limit(N, limit) val = values[N - 1] + prev_mask = mask[N - 1] for i in range(N - 1, -1, -1): if mask[i]: if fill_count >= lim: continue fill_count += 1 values[i] = val + mask[i] = prev_mask else: fill_count = 0 val = values[i] + prev_mask = mask[i] @cython.boundscheck(False) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 825757ddffee4..cf40d731c4395 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -288,7 +288,7 @@ def fillna( if mask.any(): if method is not None: func = missing.get_fill_func(method) - new_values = func(self._ndarray.copy(), limit=limit, mask=mask) + new_values, _ = func(self._ndarray.copy(), limit=limit, mask=mask) # TODO: PandasArray didn't used to copy, need tests for this new_values = self._from_backing_data(new_values) else: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index edc8fa14ca142..da3868f170a58 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -708,7 +708,7 @@ def fillna(self, value=None, method=None, limit=None): if mask.any(): if method is not None: func = get_fill_func(method) - new_values = func(self.astype(object), limit=limit, mask=mask) + new_values, _ = func(self.astype(object), limit=limit, mask=mask) new_values = self._from_sequence(new_values, dtype=self.dtype) else: # fill with value diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index ebe63003760f7..329f50a19d6ed 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -172,7 +172,6 @@ def fillna( self._data.copy(), limit=limit, mask=mask.copy(), - update_mask=True, ) return type(self)(new_values, new_mask.view(np.bool_)) else: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index e2b0ad372bf88..7182228b38a0e 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -392,7 +392,7 @@ def fillna(self, value=None, method=None, limit=None): if mask.any(): if method is not None: func = get_fill_func(method) - new_values = func(self.to_numpy(object), limit=limit, mask=mask) + new_values, _ = func(self.to_numpy(object), limit=limit, mask=mask) new_values = self._from_sequence(new_values) else: # fill with value diff --git a/pandas/core/missing.py b/pandas/core/missing.py index c7eb3448ad8bb..7dd06ad33609d 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -696,30 +696,20 @@ def _pad_1d( values: np.ndarray, limit: int | None = None, mask: np.ndarray | None = None, - update_mask: bool = False, -) -> np.ndarray | tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: values, mask = _fillna_prep(values, mask) - if update_mask: - algos.pad_inplace_masked(values, mask, limit=limit) - return values, mask - else: - algos.pad_inplace(values, mask, limit=limit) - return values + algos.pad_inplace(values, mask, limit=limit) + return values, mask def _backfill_1d( values: np.ndarray, limit: int | None = None, mask: np.ndarray | None = None, - update_mask: bool = False, -) -> np.ndarray | tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: values, mask = _fillna_prep(values, mask) - if update_mask: - algos.pad_inplace_masked(values[::-1], mask[::-1], limit=limit) - return values, mask - else: - algos.backfill_inplace(values, mask, limit=limit) - return values + algos.backfill_inplace(values, mask, limit=limit) + return values, mask def _pad_2d(values, limit=None, mask=None): diff --git a/pandas/core/series.py b/pandas/core/series.py index 34e9464006b30..d8a250504f949 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4515,7 +4515,7 @@ def _replace_single(self, to_replace, method: str, inplace: bool, limit): fill_f = missing.get_fill_func(method) mask = missing.mask_missing(result.values, to_replace) - values = fill_f(result.values, limit=limit, mask=mask) + values, _ = fill_f(result.values, limit=limit, mask=mask) if values.dtype == orig_dtype and inplace: return From 5955dad24b7dbd5742992a6931a0893825e7717c Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Mar 2021 16:13:17 +0000 Subject: [PATCH 4/8] post merge fix-up --- pandas/core/missing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e9228b264ae47..1b5a7237b5287 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -660,9 +660,9 @@ def interpolate_2d( method = clean_fill_method(method) tvalues = transf(values) if method == "pad": - result = _pad_2d(tvalues, limit=limit) + result, _ = _pad_2d(tvalues, limit=limit) else: - result = _backfill_2d(tvalues, limit=limit) + result, _ = _backfill_2d(tvalues, limit=limit) result = transf(result) # reshape back @@ -698,8 +698,8 @@ def new_func(values, limit=None, mask=None): # This needs to occur before casting to int64 mask = isna(values) - result = func(values.view("i8"), limit=limit, mask=mask) - return result.view(values.dtype) + result, mask = func(values.view("i8"), limit=limit, mask=mask) + return result.view(values.dtype), mask return func(values, limit=limit, mask=mask) @@ -737,7 +737,7 @@ def _pad_2d(values, limit=None, mask=None): else: # for test coverage pass - return values + return values, mask @_datetimelike_compat @@ -749,7 +749,7 @@ def _backfill_2d(values, limit=None, mask=None): else: # for test coverage pass - return values + return values, mask _fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} From 245d99c1f9afca299a4cfef887ea479a19e0bf8c Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Mar 2021 13:05:22 +0000 Subject: [PATCH 5/8] add dt benchmark --- asv_bench/benchmarks/frame_methods.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 52db81c8329a6..bd7bffa6094cd 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -360,16 +360,30 @@ class Fillna: "object", "Int64", "Float64", + "datetime64[ns]", + "datetime64[ns, tz]", ], ) param_names = ["inplace", "method", "dtype"] def setup(self, inplace, method, dtype): - values = np.random.randn(10000, 100) - values[::2] = np.nan - if dtype == "Int64": - values = values.round() - self.df = DataFrame(values, dtype=dtype) + if dtype in ("datetime64[ns]", "datetime64[ns, tz]"): + N = 10000 + M = 100 + data = { + "datetime64[ns]": date_range("2011-01-01", freq="H", periods=N), + "datetime64[ns, tz]": date_range( + "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" + ), + } + self.df = DataFrame({f"col_{i}": data[dtype] for i in range(M)}) + self.df[::2] = None + else: + values = np.random.randn(10000, 100) + values[::2] = np.nan + if dtype == "Int64": + values = values.round() + self.df = DataFrame(values, dtype=dtype) def time_frame_fillna(self, inplace, method, dtype): self.df.fillna(inplace=inplace, method=method) From 9f10cf2a8c873e1b2f6402817b3520a86e3e43e4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Mar 2021 13:23:20 +0000 Subject: [PATCH 6/8] add td benchmark --- asv_bench/benchmarks/frame_methods.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index bd7bffa6094cd..33f31070bcbe6 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -11,6 +11,7 @@ date_range, isnull, period_range, + timedelta_range, ) from .pandas_vb_common import tm @@ -362,24 +363,25 @@ class Fillna: "Float64", "datetime64[ns]", "datetime64[ns, tz]", + "timedelta64[ns]", ], ) param_names = ["inplace", "method", "dtype"] def setup(self, inplace, method, dtype): - if dtype in ("datetime64[ns]", "datetime64[ns, tz]"): - N = 10000 - M = 100 + N, M = 10000, 100 + if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"): data = { "datetime64[ns]": date_range("2011-01-01", freq="H", periods=N), "datetime64[ns, tz]": date_range( "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo" ), + "timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"), } self.df = DataFrame({f"col_{i}": data[dtype] for i in range(M)}) self.df[::2] = None else: - values = np.random.randn(10000, 100) + values = np.random.randn(N, M) values[::2] = np.nan if dtype == "Int64": values = values.round() From fb26e31bc262ed7177ef43ba2ad0c1e90d37a9cb Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Mar 2021 13:31:09 +0000 Subject: [PATCH 7/8] add release note --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index f7204ceb9d412..e1f5c68dff1d5 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -334,6 +334,7 @@ Performance improvements - Performance improvement in :meth:`IntervalIndex.isin` (:issue:`38353`) - Performance improvement in :meth:`Series.mean` for nullable data types (:issue:`34814`) - Performance improvement in :meth:`Series.isin` for nullable data types (:issue:`38340`) +- Performance improvement in :meth:`DataFrame.fillna` with ``method="pad|backfill"`` for nullable floating and nullable integer dtypes (:issue:`39953`) - Performance improvement in :meth:`DataFrame.corr` for method=kendall (:issue:`28329`) - Performance improvement in :meth:`core.window.rolling.Rolling.corr` and :meth:`core.window.rolling.Rolling.cov` (:issue:`39388`) - Performance improvement in :meth:`core.window.rolling.RollingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` (:issue:`39591`) From bf1ecabe8e3002327109333d417a6fa6e786433f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 5 Mar 2021 14:55:00 +0000 Subject: [PATCH 8/8] remove redundant copy --- pandas/core/internals/blocks.py | 7 ++----- pandas/tests/extension/base/missing.py | 12 ++++++++++++ pandas/tests/extension/test_interval.py | 4 ++++ pandas/tests/extension/test_numpy.py | 5 +++++ pandas/tests/extension/test_sparse.py | 7 +++++++ 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f2b8499a316b7..e2e4164c3db91 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1734,16 +1734,13 @@ def _slice(self, slicer): def fillna( self, value, limit=None, inplace: bool = False, downcast=None ) -> List[Block]: - values = self.values if inplace else self.values.copy() - values = values.fillna(value=value, limit=limit) + values = self.values.fillna(value=value, limit=limit) return [self.make_block_same_class(values=values)] def interpolate( self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs ): - - values = self.values if inplace else self.values.copy() - new_values = values.fillna(value=fill_value, method=method, limit=limit) + new_values = self.values.fillna(value=fill_value, method=method, limit=limit) return self.make_block_same_class(new_values) def diff(self, n: int, axis: int = 1) -> List[Block]: diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 0cf03533915f2..c501694a7c2d5 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -69,6 +69,18 @@ def test_fillna_limit_backfill(self, data_missing): expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) self.assert_series_equal(result, expected) + def test_fillna_no_op_returns_copy(self, data): + data = data[~data.isna()] + + valid = data[0] + result = data.fillna(valid) + assert result is not data + self.assert_extension_array_equal(result, data) + + result = data.fillna(method="backfill") + assert result is not data + self.assert_extension_array_equal(result, data) + def test_fillna_series(self, data_missing): fill_value = data_missing[1] ser = pd.Series(data_missing) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 1bc06ee4b6397..24c0d619e2b1a 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -132,6 +132,10 @@ def test_fillna_series_method(self): def test_fillna_limit_backfill(self): pass + @unsupported_fill + def test_fillna_no_op_returns_copy(self): + pass + @unsupported_fill def test_fillna_series(self): pass diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 98e173ee23f01..588f0d027439e 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -309,6 +309,11 @@ def test_fillna_scalar(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_scalar(data_missing) + @skip_nested + def test_fillna_no_op_returns_copy(self, data): + # Non-scalar "scalar" values. + super().test_fillna_no_op_returns_copy(data) + @skip_nested def test_fillna_series(self, data_missing): # Non-scalar "scalar" values. diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 067fada5edcae..a49e1b4a367fd 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -221,6 +221,13 @@ def test_fillna_limit_backfill(self, data_missing): with tm.assert_produces_warning(PerformanceWarning): super().test_fillna_limit_backfill(data_missing) + def test_fillna_no_op_returns_copy(self, data, request): + if np.isnan(data.fill_value): + request.node.add_marker( + pytest.mark.xfail(reason="returns array with different fill value") + ) + super().test_fillna_no_op_returns_copy(data) + def test_fillna_series_method(self, data_missing): with tm.assert_produces_warning(PerformanceWarning): super().test_fillna_limit_backfill(data_missing)