diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ff835eb32f6df..29a172dcdd2c7 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -373,7 +373,7 @@ def __init__( # infer categories in a factorization step further below if fastpath: - self._codes = coerce_indexer_dtype(values, dtype.categories) + self._ndarray = coerce_indexer_dtype(values, dtype.categories) self._dtype = self._dtype.update_dtype(dtype) return @@ -450,7 +450,7 @@ def __init__( codes = full_codes self._dtype = self._dtype.update_dtype(dtype) - self._codes = coerce_indexer_dtype(codes, dtype.categories) + self._ndarray = coerce_indexer_dtype(codes, dtype.categories) @property def dtype(self) -> CategoricalDtype: @@ -923,7 +923,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal codes = recode_for_categories( cat.codes, cat.categories, new_dtype.categories ) - cat._codes = codes + cat._ndarray = codes cat._dtype = new_dtype if not inplace: @@ -1096,7 +1096,7 @@ def add_categories(self, new_categories, inplace=False): cat = self if inplace else self.copy() cat._dtype = new_dtype - cat._codes = coerce_indexer_dtype(cat._codes, new_dtype.categories) + cat._ndarray = coerce_indexer_dtype(cat._ndarray, new_dtype.categories) if not inplace: return cat @@ -1201,7 +1201,7 @@ def remove_unused_categories(self, inplace=no_default): new_categories, ordered=self.ordered ) cat._dtype = new_dtype - cat._codes = coerce_indexer_dtype(inv, new_dtype.categories) + cat._ndarray = coerce_indexer_dtype(inv, new_dtype.categories) if not inplace: return cat @@ -1384,6 +1384,10 @@ def __setstate__(self, state): if "_dtype" not in state: state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"]) + if "_codes" in state and "_ndarray" not in state: + # backward compat, changed what is property vs attribute + state["_ndarray"] = state.pop("_codes") + for k, v in state.items(): setattr(self, k, v) @@ -1785,11 +1789,11 @@ def fillna(self, value=None, method=None, limit=None): # NDArrayBackedExtensionArray compat @property - def _ndarray(self) -> np.ndarray: - return self._codes + def _codes(self) -> np.ndarray: + return self._ndarray def _from_backing_data(self, arr: np.ndarray) -> Categorical: - return self._constructor(arr, dtype=self.dtype, fastpath=True) + return type(self)(arr, dtype=self.dtype, fastpath=True) def _box_func(self, i: int): if i == -1: @@ -1800,7 +1804,7 @@ def _unbox_scalar(self, key) -> int: # searchsorted is very performance sensitive. By converting codes # to same dtype as self.codes, we get much faster performance. code = self.categories.get_loc(key) - code = self._codes.dtype.type(code) + code = self._ndarray.dtype.type(code) return code # ------------------------------------------------------------------ @@ -2162,7 +2166,7 @@ def unique(self): cat = self.copy() # keep nan in codes - cat._codes = unique_codes + cat._ndarray = unique_codes # exclude nan from indexer for categories take_codes = unique_codes[unique_codes != -1] diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 4b2df268f5c1a..8c9caf2e59011 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -351,7 +351,7 @@ def test_engine_type(self, dtype, engine_type): # having 2**32 - 2**31 categories would be very memory-intensive, # so we cheat a bit with the dtype ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1) - ci.values._codes = ci.values._codes.astype("int64") + ci.values._ndarray = ci.values._ndarray.astype("int64") assert np.issubdtype(ci.codes.dtype, dtype) assert isinstance(ci._engine, engine_type)