pandas-dev · mroeschke · Jun 27, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
@@ -188,7 +188,7 @@ Use :meth:`DataFrame.sparse.from_spmatrix` to create a :class:`DataFrame` with s
    sp_arr = csr_matrix(arr)
    sp_arr
 
-   sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr)
+   sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr, fill_value=0)
    sdf.head()
    sdf.dtypes
 

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -584,7 +584,7 @@ Reshaping
 Sparse
 ^^^^^^
 - Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
--
+- Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`)
 
 ExtensionArray
 ^^^^^^^^^^^^^^

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
@@ -265,7 +265,9 @@ def _validate(self, data) -> None:
             raise AttributeError(self._validation_msg)
 
     @classmethod
-    def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
+    def from_spmatrix(
+        cls, data, index=None, columns=None, fill_value=None
+    ) -> DataFrame:
         """
         Create a new DataFrame from a scipy sparse matrix.
 
@@ -276,6 +278,22 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
         index, columns : Index, optional
             Row and column labels to use for the resulting DataFrame.
             Defaults to a RangeIndex.
+        fill_value : scalar, optional
+            The scalar value not stored in the columns. By default, this
+            depends on the dtype of ``data``.
+
+            =========== ==========
+            dtype       na_value
+            =========== ==========
+            float       ``np.nan``
+            complex     ``np.nan``
+            int         ``0``
+            bool        ``False``
+            datetime64  ``pd.NaT``
+            timedelta64 ``pd.NaT``
+            =========== ==========
+
+            The default value may be overridden by specifying a ``fill_value``.
 
         Returns
         -------
@@ -292,11 +310,11 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
         --------
         >>> import scipy.sparse
         >>> mat = scipy.sparse.eye(3, dtype=float)
-        >>> pd.DataFrame.sparse.from_spmatrix(mat)
+        >>> pd.DataFrame.sparse.from_spmatrix(mat, fill_value=0.0)
              0    1    2
-        0  1.0    0    0
-        1    0  1.0    0
-        2    0    0  1.0
+        0  1.0  0.0  0.0
+        1  0.0  1.0  0.0
+        2  0.0  0.0  1.0
         """
         from pandas._libs.sparse import IntIndex
 
@@ -313,7 +331,7 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
         indices = data.indices
         indptr = data.indptr
         array_data = data.data
-        dtype = SparseDtype(array_data.dtype, 0)
+        dtype = SparseDtype(array_data.dtype, fill_value)
 if fill_value is None: 
     fill_value = na_value_for_dtype(dtype) 
    sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr) 
    sdf.head() 
    sdf.dtypes 
 All sparse formats are supported, but matrices that are not in :mod:`COOrdinate <scipy.sparse>` format will be converted, copying data as needed. 
 To convert back to sparse SciPy matrix in COO format, you can use the :meth:`DataFrame.sparse.to_coo` method: 
 .. ipython:: python 
    sdf.sparse.to_coo() 
 if sp_arr.fill_value != 0: 
     raise ValueError("fill value must be 0 when converting to COO matrix") 
 if fill_value is None: 
     fill_value = na_value_for_dtype(dtype) 
    sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr) 
    sdf.head() 
    sdf.dtypes 
  
 All sparse formats are supported, but matrices that are not in :mod:`COOrdinate <scipy.sparse>` format will be converted, copying data as needed. 
 To convert back to sparse SciPy matrix in COO format, you can use the :meth:`DataFrame.sparse.to_coo` method: 
  
 .. ipython:: python 
  
    sdf.sparse.to_coo() 
 if sp_arr.fill_value != 0: 
     raise ValueError("fill value must be 0 when converting to COO matrix") 
         arrays = []
         for i in range(n_columns):
             sl = slice(indptr[i], indptr[i + 1])

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -1666,7 +1666,7 @@ class SparseDtype(ExtensionDtype):
     """
     Dtype for data stored in :class:`SparseArray`.
 
-    `SparseDtype` is used as the data type for :class:`SparseArray`, enabling
+    SparseDtype is used as the data type for :class:`SparseArray`, enabling
     more efficient storage of data that contains a significant number of
     repetitive values typically represented by a fill value. It supports any
     scalar dtype as the underlying data type of the non-fill values.
@@ -1677,19 +1677,20 @@ class SparseDtype(ExtensionDtype):
         The dtype of the underlying array storing the non-fill value values.
     fill_value : scalar, optional
         The scalar value not stored in the SparseArray. By default, this
-        depends on `dtype`.
+        depends on ``dtype``.
 
         =========== ==========
         dtype       na_value
         =========== ==========
         float       ``np.nan``
+        complex     ``np.nan``
         int         ``0``
         bool        ``False``
         datetime64  ``pd.NaT``
         timedelta64 ``pd.NaT``
         =========== ==========
 
-        The default value may be overridden by specifying a `fill_value`.
+        The default value may be overridden by specifying a ``fill_value``.
 
     Attributes
     ----------

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -618,6 +618,8 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
     nan
     >>> na_value_for_dtype(np.dtype("float64"))
     nan
+    >>> na_value_for_dtype(np.dtype("complex128"))
+    nan
     >>> na_value_for_dtype(np.dtype("bool"))
     False
     >>> na_value_for_dtype(np.dtype("datetime64[ns]"))
@@ -629,7 +631,7 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
     elif dtype.kind in "mM":
         unit = np.datetime_data(dtype)[0]
         return dtype.type("NaT", unit)
-    elif dtype.kind == "f":
+    elif dtype.kind in "fc":
         return np.nan
     elif dtype.kind in "iu":
         if compat:

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
@@ -105,14 +105,16 @@ def test_accessor_raises(self):
 
     @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
     @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
-    @pytest.mark.parametrize("dtype", ["float64", "int64"])
+    @pytest.mark.parametrize("dtype", ["complex128", "float64", "int64"])
     def test_from_spmatrix(self, format, labels, dtype):
         sp_sparse = pytest.importorskip("scipy.sparse")
 
         sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item())
 
         mat = sp_sparse.eye(10, format=format, dtype=dtype)
-        result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels)
+        result = pd.DataFrame.sparse.from_spmatrix(
+            mat, index=labels, columns=labels, fill_value=0
+        )
         expected = pd.DataFrame(
             np.eye(10, dtype=dtype), index=labels, columns=labels
         ).astype(sp_dtype)
@@ -124,7 +126,7 @@ def test_from_spmatrix_including_explicit_zero(self, format):
 
         mat = sp_sparse.random(10, 2, density=0.5, format=format)
         mat.data[0] = 0
-        result = pd.DataFrame.sparse.from_spmatrix(mat)
+        result = pd.DataFrame.sparse.from_spmatrix(mat, fill_value=0)
         dtype = SparseDtype("float64", 0.0)
         expected = pd.DataFrame(mat.todense()).astype(dtype)
         tm.assert_frame_equal(result, expected)
@@ -139,10 +141,28 @@ def test_from_spmatrix_columns(self, columns):
         dtype = SparseDtype("float64", 0.0)
 
         mat = sp_sparse.random(10, 2, density=0.5)
-        result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns)
+        result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns, fill_value=0)
         expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "dtype, fill_value",
+        [("bool", False), ("float64", np.nan), ("complex128", np.nan)],
+    )
+    @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
+    def test_from_spmatrix_fill_value(self, format, dtype, fill_value):
+        sp_sparse = pytest.importorskip("scipy.sparse")
+
+        sp_dtype = SparseDtype(dtype, fill_value)
+
+        sp_mat = sp_sparse.eye(10, format=format, dtype=dtype)
+        result = pd.DataFrame.sparse.from_spmatrix(sp_mat, fill_value=fill_value)
+        mat = np.eye(10, dtype=dtype)
+        expected = pd.DataFrame(
+            np.ma.array(mat, mask=(mat == 0)).filled(fill_value)
+        ).astype(sp_dtype)
+        tm.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize(
         "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
     )

diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
@@ -697,6 +697,9 @@ def test_array_equivalent_index_with_tuples():
         ("f2", np.nan),
         ("f4", np.nan),
         ("f8", np.nan),
+        # Complex
+        ("c8", np.nan),
+        ("c16", np.nan),
         # Object
         ("O", np.nan),
         # Interval

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -1292,7 +1292,7 @@ def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
         # diagonal cells are ones, meaning the last two columns are purely sparse.
         rows, cols = 5, 7
         spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
-        df = DataFrame.sparse.from_spmatrix(spmatrix)
+        df = DataFrame.sparse.from_spmatrix(spmatrix, fill_value=0)
 
         # regression test for GH#34526
         itr_idx = range(2, rows)
@@ -1314,7 +1314,7 @@ def test_loc_getitem_sparse_frame(self):
         # GH34687
         sp_sparse = pytest.importorskip("scipy.sparse")
 
-        df = DataFrame.sparse.from_spmatrix(sp_sparse.eye(5))
+        df = DataFrame.sparse.from_spmatrix(sp_sparse.eye(5), fill_value=0)
         result = df.loc[range(2)]
         expected = DataFrame(
             [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]],