diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 2394d4721edfc..62605c6352ba0 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -216,6 +216,7 @@ Indexing Missing ^^^^^^^ - Bug in :meth:`Index.equals` raising ``TypeError`` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`) +- Bug when calling :meth:`Series.to_numpy` on a Series of ``object`` dtype containing ``pd.NA`` was raising even if ``na_value`` was valid (:issue:`48951`) - MultiIndex diff --git a/pandas/core/base.py b/pandas/core/base.py index 927a3ed6a601c..067b25de92c4d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -537,12 +537,14 @@ def to_numpy( f"to_numpy() got an unexpected keyword argument '{bad_keys}'" ) - result = np.asarray(self._values, dtype=dtype) # TODO(GH-24345): Avoid potential double copy if copy or na_value is not lib.no_default: - result = result.copy() + result = self._values.copy() if na_value is not lib.no_default: result[np.asanyarray(self.isna())] = na_value + result = np.asarray(result, dtype=dtype) + else: + result = np.asarray(self._values, dtype=dtype) return result @property diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f3b7af0ea819d..05ca6c49d3c3a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1745,10 +1745,13 @@ def as_array( dtype=dtype, na_value=na_value, ).reshape(blk.shape) + elif na_value is lib.no_default: + arr = np.asarray(blk.get_values(dtype)) else: - arr = np.asarray(blk.get_values()) - if dtype: - arr = arr.astype(dtype, copy=False) + arr = np.asarray(blk.get_values().copy()) + arr = np.where(~isna(arr), arr, na_value).astype(dtype) + # We've already copied the underlying data + copy = False else: arr = self._interleave(dtype=dtype, na_value=na_value) # The underlying data was copied within _interleave @@ -1815,7 +1818,7 @@ def _interleave( ) else: arr = blk.get_values(dtype) - result[rl.indexer] = arr + result[rl.indexer] = np.where(~isna(arr), arr, na_value) itemmask[rl.indexer] = 1 if not itemmask.all(): diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 599aaae4d3527..4a2650708c461 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -398,6 +398,7 @@ def test_to_numpy_dtype(as_series): "values, dtype, na_value, expected", [ ([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]), + ([1, 2, pd.NA, 4], "int32", 0, [1, 2, 0, 4]), ( [Timestamp("2000"), Timestamp("2000"), pd.NaT], None, @@ -411,7 +412,7 @@ def test_to_numpy_na_value_numpy_dtype( ): obj = index_or_series(values) result = obj.to_numpy(dtype=dtype, na_value=na_value) - expected = np.array(expected) + expected = np.array(expected, dtype=dtype) tm.assert_numpy_array_equal(result, expected) @@ -477,6 +478,7 @@ def test_to_numpy_kwargs_raises(): {"a": [1, 2, 3], "b": [1, 2, None]}, {"a": np.array([1, 2, 3]), "b": np.array([1, 2, np.nan])}, {"a": pd.array([1, 2, 3]), "b": pd.array([1, 2, None])}, + {"a": np.array([1, 2, 3]), "b": np.array([1, 2, pd.NA])}, ], ) @pytest.mark.parametrize("dtype, na_value", [(float, np.nan), (object, None)]) @@ -489,22 +491,34 @@ def test_to_numpy_dataframe_na_value(data, dtype, na_value): @pytest.mark.parametrize( - "data, expected", + "data, dtype, expected", [ ( {"a": pd.array([1, 2, None])}, + float, np.array([[1.0], [2.0], [np.nan]], dtype=float), ), + ( + {"a": np.array([1, 2, pd.NA])}, + float, + np.array([[1.0], [2.0], [np.nan]]), + ), + ( + {"a": np.array(["a", "b", pd.NA])}, + object, + np.array([["a"], ["b"], [np.nan]], dtype=object), + ), ( {"a": [1, 2, 3], "b": [1, 2, 3]}, + float, np.array([[1, 1], [2, 2], [3, 3]], dtype=float), ), ], ) -def test_to_numpy_dataframe_single_block(data, expected): +def test_to_numpy_dataframe_single_block(data, dtype, expected): # https://github.com/pandas-dev/pandas/issues/33820 df = pd.DataFrame(data) - result = df.to_numpy(dtype=float, na_value=np.nan) + result = df.to_numpy(dtype=dtype, na_value=np.nan) tm.assert_numpy_array_equal(result, expected)