From efbd0e14cd6b35ab1da9bdb22e46cd13738aab4a Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 5 Feb 2021 11:45:23 -0500 Subject: [PATCH 1/8] TST: add OP --- pandas/tests/series/test_dtypes.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index d59f0c05c7462..319631de83b5e 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -94,6 +94,14 @@ def cmp(a, b): result = ser.astype("object").astype(CategoricalDtype()) tm.assert_series_equal(result, roundtrip_expected) + def test_categorical_int_to_int32(self): + # GH 39402 + + df = DataFrame(data={"col1": [2.0, -1.0, 3.0]}) + df.col1 = df.col1.astype("category") + df.col1 = df.col1.astype(np.int32) + assert df.col1.dtype.type is np.int32 + def test_series_to_categorical(self): # see gh-16524: test conversion of Series to Categorical series = Series(["a", "b", "c"]) From c0da63cfc1d486ebcfeaa4ba552ec892219f8804 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 5 Feb 2021 11:45:46 -0500 Subject: [PATCH 2/8] BUG: Categorical.astype --- pandas/core/arrays/categorical.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index af78b84923a9c..c16e29e62cc9b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -466,7 +466,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: else: # GH8628 (PERF): astype category codes instead of astyping array try: - astyped_cats = self.categories.astype(dtype=dtype, copy=copy) + print(dtype) + new_cats = extract_array(self.categories, extract_numpy=True) + new_cats = new_cats.astype(dtype=dtype, copy=copy) except ( TypeError, # downstream error msg for CategoricalIndex is misleading ValueError, @@ -474,8 +476,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}" raise ValueError(msg) - astyped_cats = extract_array(astyped_cats, extract_numpy=True) - result = take_1d(astyped_cats, libalgos.ensure_platform_int(self._codes)) + # astyped_cats = extract_array(astyped_cats, extract_numpy=True) + # print(astyped_cats.dtype) + result = take_1d(new_cats, libalgos.ensure_platform_int(self._codes)) return result From 8380e998cde0acac9ea01bb73de55dc6fddebdb8 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 5 Feb 2021 11:46:50 -0500 Subject: [PATCH 3/8] remove debug statements --- pandas/core/arrays/categorical.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c16e29e62cc9b..29e6d8dadc903 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -466,7 +466,6 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: else: # GH8628 (PERF): astype category codes instead of astyping array try: - print(dtype) new_cats = extract_array(self.categories, extract_numpy=True) new_cats = new_cats.astype(dtype=dtype, copy=copy) except ( @@ -476,8 +475,6 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}" raise ValueError(msg) - # astyped_cats = extract_array(astyped_cats, extract_numpy=True) - # print(astyped_cats.dtype) result = take_1d(new_cats, libalgos.ensure_platform_int(self._codes)) return result From eb03c27602c347f2e613813ca269831900e8ab38 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 5 Feb 2021 12:37:48 -0500 Subject: [PATCH 4/8] int64 -> int in Categorical astype tests --- pandas/tests/arrays/categorical/test_dtypes.py | 2 +- pandas/tests/series/test_dtypes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index a2192b2810596..6be9a424a5544 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -138,7 +138,7 @@ def test_astype(self, ordered): tm.assert_numpy_array_equal(result, expected) result = cat.astype(int) - expected = np.array(cat, dtype="int64") + expected = np.array(cat, dtype="int") tm.assert_numpy_array_equal(result, expected) result = cat.astype(float) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 319631de83b5e..849553a14b6d4 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -68,7 +68,7 @@ def test_astype_categorical_to_other(self): exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) tm.assert_series_equal(cat.astype("str"), exp) s2 = Series(Categorical(["1", "2", "3", "4"])) - exp2 = Series([1, 2, 3, 4]).astype("int64") + exp2 = Series([1, 2, 3, 4]).astype("int") tm.assert_series_equal(s2.astype("int"), exp2) # object don't sort correctly, so just compare that we have the same From cbfe7933ea335aaaf05b46de3e5031c0468b0974 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 7 Feb 2021 11:54:36 -0500 Subject: [PATCH 5/8] whatsnew --- doc/source/whatsnew/v1.2.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index cc5653fe2f360..27f4af98910aa 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -21,7 +21,7 @@ Fixed regressions - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`) - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`) -- +- Fixed regression in :meth:`Categorical.astype` casting to incorrect dtype when ``np.int32`` is passed to dtype argument (:issue:`39402`) .. --------------------------------------------------------------------------- From fdf2d5694f7ec343ddc0cb4ffcf007ac2624b449 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 7 Feb 2021 12:57:48 -0500 Subject: [PATCH 6/8] TST: add any_int_or_nullable_int_dtype fixture --- pandas/conftest.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 829ac64884dac..6ec6d66203268 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1242,6 +1242,32 @@ def any_nullable_int_dtype(request): return request.param +@pytest.fixture(params=tm.ALL_INT_DTYPES + tm.ALL_EA_INT_DTYPES) +def any_int_or_nullable_int_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + """ + return request.param + + @pytest.fixture(params=tm.ALL_EA_INT_DTYPES + tm.FLOAT_EA_DTYPES) def any_nullable_numeric_dtype(request): """ From 4bd15896896d5746219619cd5fd3b552a8857c1d Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 7 Feb 2021 12:59:11 -0500 Subject: [PATCH 7/8] TST: rename + parametrize test --- pandas/tests/series/test_dtypes.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 849553a14b6d4..d455e434f38be 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -94,13 +94,16 @@ def cmp(a, b): result = ser.astype("object").astype(CategoricalDtype()) tm.assert_series_equal(result, roundtrip_expected) - def test_categorical_int_to_int32(self): + def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype): # GH 39402 - df = DataFrame(data={"col1": [2.0, -1.0, 3.0]}) + df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])}) df.col1 = df.col1.astype("category") - df.col1 = df.col1.astype(np.int32) - assert df.col1.dtype.type is np.int32 + df.col1 = df.col1.astype(any_int_or_nullable_int_dtype) + expected = DataFrame( + {"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)} + ) + tm.assert_frame_equal(df, expected) def test_series_to_categorical(self): # see gh-16524: test conversion of Series to Categorical From e108f0daa8cb879ccb6321bdcca2cc72027c7bca Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 7 Feb 2021 16:36:59 -0500 Subject: [PATCH 8/8] use np.asarray instead of extract_array --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 29e6d8dadc903..13da3df93af14 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -466,7 +466,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: else: # GH8628 (PERF): astype category codes instead of astyping array try: - new_cats = extract_array(self.categories, extract_numpy=True) + new_cats = np.asarray(self.categories) new_cats = new_cats.astype(dtype=dtype, copy=copy) except ( TypeError, # downstream error msg for CategoricalIndex is misleading