diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 916bcf3db9a4a..9fde24ba89891 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -710,6 +710,7 @@ Missing - Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`) - Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`??`) - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`) +- Bug in :meth:`Index.fillna` incorrectly returning an un-filled :class:`Index` when NA values are present and ``downcast`` argument is specified. This now raises ``NotImplementedError`` instead; do not pass ``downcast`` argument (:issue:`44873`) - MultiIndex diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 54a13c124c13a..3ec17bd7a3583 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2723,13 +2723,18 @@ def fillna(self, value=None, downcast=None): DataFrame.fillna : Fill NaN values of a DataFrame. Series.fillna : Fill NaN Values of a Series. """ + value = self._require_scalar(value) if self.hasnans: result = self.putmask(self._isnan, value) if downcast is None: # no need to care metadata other than name - # because it can't have freq if + # because it can't have freq if it has NaTs return Index._with_infer(result, name=self.name) + raise NotImplementedError( + f"{type(self).__name__}.fillna does not support 'downcast' " + "argument values other than 'None'." + ) return self._view() def dropna(self: _IndexT, how: str_t = "any") -> _IndexT: diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f26a24c38b19f..9ef43c740d602 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -377,20 +377,6 @@ def __contains__(self, key: Any) -> bool: return contains(self, key, container=self._engine) - @doc(Index.fillna) - def fillna(self, value, downcast=None): - value = self._require_scalar(value) - try: - cat = self._data.fillna(value) - except (ValueError, TypeError): - # invalid fill_value - if not self.hasnans: - # nothing to fill, we can get away without casting - return self.copy() - return self.astype(object).fillna(value, downcast=downcast) - - return type(self)._simple_new(cat, name=self.name) - # TODO(2.0): remove reindex once non-unique deprecation is enforced def reindex( self, target, method=None, level=None, limit=None, tolerance=None diff --git a/pandas/tests/indexes/categorical/test_fillna.py b/pandas/tests/indexes/categorical/test_fillna.py index 400f236fcf803..09de578f3c649 100644 --- a/pandas/tests/indexes/categorical/test_fillna.py +++ b/pandas/tests/indexes/categorical/test_fillna.py @@ -25,17 +25,19 @@ def test_fillna_categorical(self): tm.assert_index_equal(result, expected) def test_fillna_copies_with_no_nas(self): - # Nothing to fill, should still get a copy + # Nothing to fill, should still get a copy for the Categorical method, + # but OK to get a view on CategoricalIndex method ci = CategoricalIndex([0, 1, 1]) - cat = ci._data result = ci.fillna(0) - assert result._values._ndarray is not cat._ndarray - assert result._values._ndarray.base is None + assert result is not ci + assert tm.shares_memory(result, ci) - # Same check directly on the Categorical object + # But at the EA level we always get a copy. + cat = ci._data result = cat.fillna(0) assert result._ndarray is not cat._ndarray assert result._ndarray.base is None + assert not tm.shares_memory(result, cat) def test_fillna_validates_with_no_nas(self): # We validate the fill value even if fillna is a no-op diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 2c8873acd8303..9dee9fe288ed8 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -516,6 +516,11 @@ def test_fillna(self, index): idx = type(index)(values) + msg = "does not support 'downcast'" + with pytest.raises(NotImplementedError, match=msg): + # For now at least, we only raise if there are NAs present + idx.fillna(idx[0], downcast="infer") + expected = np.array([False] * len(idx), dtype=bool) expected[1] = True tm.assert_numpy_array_equal(idx._isnan, expected)