diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7838ef8df4164..c908825fd5da6 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -790,7 +790,7 @@ Reshaping Sparse ^^^^^^ -- Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`) +- Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`,:issue:`50087`) - Bug in :meth:`Series.astype` when converting a from ``datetime64[ns]`` to ``Sparse[datetime64[ns]]`` incorrectly raising (:issue:`50082`) - diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 033917fe9eb2d..2a6e26fbdbd1c 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -49,10 +49,7 @@ validate_insert_loc, ) -from pandas.core.dtypes.astype import ( - astype_array, - astype_nansafe, -) +from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, find_common_type, @@ -445,7 +442,7 @@ def __init__( # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: - dtype = object + dtype = np.dtype(object) data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise @@ -464,10 +461,7 @@ def __init__( if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index # error: Argument "dtype" to "asarray" has incompatible type - # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected - # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, - # Any]]]" + # "Union[ExtensionDtype, dtype[Any], None]"; expected "None" sparse_values = np.asarray( data.sp_values, dtype=dtype # type: ignore[arg-type] ) @@ -487,10 +481,10 @@ def __init__( if fill_value is NaT: fill_value = np.datetime64("NaT", "ns") data = np.asarray(data) - sparse_values, sparse_index, fill_value = make_sparse( - # error: Argument "dtype" to "make_sparse" has incompatible type - # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected - # "Union[str, dtype[Any], None]" + sparse_values, sparse_index, fill_value = _make_sparse( + # error: Argument "dtype" to "_make_sparse" has incompatible type + # "Union[ExtensionDtype, dtype[Any], None]"; expected + # "Optional[dtype[Any]]" data, kind=kind, fill_value=fill_value, @@ -498,10 +492,7 @@ def __init__( ) else: # error: Argument "dtype" to "asarray" has incompatible type - # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected - # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, - # Any]]]" + # "Union[ExtensionDtype, dtype[Any], None]"; expected "None" sparse_values = np.asarray(data, dtype=dtype) # type: ignore[arg-type] if len(sparse_values) != sparse_index.npoints: raise AssertionError( @@ -1288,7 +1279,9 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): dtype = self.dtype.update_dtype(dtype) subtype = pandas_dtype(dtype._subtype_with_str) subtype = cast(np.dtype, subtype) # ensured by update_dtype - sp_values = astype_nansafe(self.sp_values, subtype, copy=copy) + values = ensure_wrapped_if_datetimelike(self.sp_values) + sp_values = astype_array(values, subtype, copy=copy) + sp_values = np.asarray(sp_values) return self._simple_new(sp_values, self.sp_index, dtype) @@ -1828,11 +1821,11 @@ def _formatter(self, boxed: bool = False): return None -def make_sparse( +def _make_sparse( arr: np.ndarray, kind: SparseIndexKind = "block", fill_value=None, - dtype: NpDtype | None = None, + dtype: np.dtype | None = None, ): """ Convert ndarray to sparse format @@ -1882,7 +1875,10 @@ def make_sparse( index = make_sparse_index(length, indices, kind) sparsified_values = arr[mask] if dtype is not None: - sparsified_values = astype_nansafe(sparsified_values, dtype=pandas_dtype(dtype)) + sparsified_values = ensure_wrapped_if_datetimelike(sparsified_values) + sparsified_values = astype_array(sparsified_values, dtype=dtype) + sparsified_values = np.asarray(sparsified_values) + # TODO: copy return sparsified_values, index, fill_value diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index f9a0ac6d38b6d..1b48e90538e8e 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -127,7 +127,15 @@ def __eq__(self, other: Any) -> bool: or isinstance(other.fill_value, type(self.fill_value)) ) else: - fill_value = self.fill_value == other.fill_value + with warnings.catch_warnings(): + # Ignore spurious numpy warning + warnings.filterwarnings( + "ignore", + "elementwise comparison failed", + category=DeprecationWarning, + ) + + fill_value = self.fill_value == other.fill_value return subtype and fill_value return False diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py index 924f7a56e806a..d729a31668ade 100644 --- a/pandas/tests/arrays/sparse/test_astype.py +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -119,3 +119,15 @@ def test_astype_dt64_to_int64(self): result = arr.astype("int64") expected = values.astype("int64") tm.assert_numpy_array_equal(result, expected) + + # we should also be able to cast to equivalent Sparse[int64] + dtype_int64 = SparseDtype("int64", np.iinfo(np.int64).min) + result2 = arr.astype(dtype_int64) + tm.assert_numpy_array_equal(result2.to_numpy(), expected) + + # GH#50087 we should match the non-sparse behavior regardless of + # if we have a fill_value other than NaT + dtype = SparseDtype("datetime64[ns]", values[1]) + arr3 = SparseArray(values, dtype=dtype) + result3 = arr3.astype("int64") + tm.assert_numpy_array_equal(result3, expected)