From 6a10fa824944d4e067b7c13ce984470ba5cf1f77 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Thu, 11 Jan 2018 09:45:10 +0700
Subject: [PATCH 1/9] BUG: assign doesnt cast SparseDataFrame to DataFrame

The problem here is that a SparseDataFrame that calls assign should cast
to a DataFrame mainly because SparseDataFrames are a special case.
---
 doc/source/whatsnew/v0.23.0.txt           | 1 +
 pandas/core/frame.py                      | 4 +++-
 pandas/tests/frame/test_mutate_columns.py | 7 +++++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 92eeed89ada2a..451135a539e2a 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -448,6 +448,7 @@ Reshaping
 - Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`)
 - Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string.  The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`)
 - Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`)
+- Bug in :func:`DataFrame.assign` which doesn't cast ``SparseDataFrame`` as ``DataFrame``. (:issue:`19163`)
 
 Numeric
 ^^^^^^^
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a8c4053850548..326a551cfee24 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2713,7 +2713,9 @@ def assign(self, **kwargs):
         8   9  0.549296  2.197225
         9  10 -0.758542  2.302585
         """
-        data = self.copy()
+
+        # See GH19163
+        data = self.copy().to_dense()
 
         # do all calculations first...
         results = OrderedDict()
diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py
index 9acdf2f17d86a..f1d0284b5eeeb 100644
--- a/pandas/tests/frame/test_mutate_columns.py
+++ b/pandas/tests/frame/test_mutate_columns.py
@@ -55,6 +55,13 @@ def test_assign(self):
         result = df.assign(A=lambda x: x.A + x.B)
         assert_frame_equal(result, expected)
 
+        # SparseDataFrame
+        # See GH 19163
+        result = df.to_sparse(fill_value=False).assign(newcol=False)
+        expected = df.assign(newcol=False)
+        assert type(result) is DataFrame
+        assert_frame_equal(expected, result)
+
     def test_assign_multiple(self):
         df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=['A', 'B'])
         result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B)

From 686ef8e8120dd0fd52bd31b3ed65ecbee1ba1b43 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Fri, 19 Jan 2018 16:09:01 +0700
Subject: [PATCH 2/9] BUG: Fixes problem with SparseArray coercing to float if
 index is passed

---
 pandas/core/frame.py                      |  5 ++---
 pandas/core/sparse/array.py               |  4 ++--
 pandas/tests/frame/test_mutate_columns.py | 11 ++---------
 pandas/tests/sparse/frame/test_frame.py   | 12 ++++++++++++
 pandas/tests/sparse/test_array.py         | 11 +++++++++++
 5 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 326a551cfee24..727666a0f5112 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2649,7 +2649,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
                           allow_duplicates=allow_duplicates)
 
     def assign(self, **kwargs):
-        r"""
+        """
         Assign new columns to a DataFrame, returning a new object
         (a copy) with all the original columns in addition to the new ones.
 
@@ -2714,8 +2714,7 @@ def assign(self, **kwargs):
         9  10 -0.758542  2.302585
         """
 
-        # See GH19163
-        data = self.copy().to_dense()
+        data = self.copy()
 
         # do all calculations first...
         results = OrderedDict()
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 9b2650359bf68..31dbedcf9218d 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -27,7 +27,7 @@
     is_scalar, is_dtype_equal)
 from pandas.core.dtypes.cast import (
     maybe_convert_platform, maybe_promote,
-    astype_nansafe, find_common_type)
+    astype_nansafe, find_common_type, infer_dtype_from)
 from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype
 
 import pandas._libs.sparse as splib
@@ -195,7 +195,7 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
                 data = np.nan
             if not is_scalar(data):
                 raise Exception("must only pass scalars with an index ")
-            values = np.empty(len(index), dtype='float64')
+            values = np.empty(len(index), dtype=infer_dtype_from(data)[0])
             values.fill(data)
             data = values
 
diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py
index f1d0284b5eeeb..25173cf04d9e4 100644
--- a/pandas/tests/frame/test_mutate_columns.py
+++ b/pandas/tests/frame/test_mutate_columns.py
@@ -6,9 +6,9 @@
 import numpy as np
 from pandas.compat import PY36
 
-from pandas import DataFrame, Series, Index, MultiIndex
+from pandas import DataFrame, Series, Index, MultiIndex, SparseSeries
 
-from pandas.util.testing import assert_frame_equal
+from pandas.util.testing import assert_frame_equal, assert_series_equal
 
 import pandas.util.testing as tm
 
@@ -55,13 +55,6 @@ def test_assign(self):
         result = df.assign(A=lambda x: x.A + x.B)
         assert_frame_equal(result, expected)
 
-        # SparseDataFrame
-        # See GH 19163
-        result = df.to_sparse(fill_value=False).assign(newcol=False)
-        expected = df.assign(newcol=False)
-        assert type(result) is DataFrame
-        assert_frame_equal(expected, result)
-
     def test_assign_multiple(self):
         df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=['A', 'B'])
         result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B)
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index 058892e3b85ff..1454c7d1351cf 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -1271,3 +1271,15 @@ def test_quantile_multi(self):
 
         tm.assert_frame_equal(result, dense_expected)
         tm.assert_sp_frame_equal(result, sparse_expected)
+
+    def test_assign_with_sparse_frame(self):
+        # GH 19163
+        df = pd.DataFrame({"a":[1,2,3]})
+        res = df.to_sparse(fill_value=False).assign(newcol=False)
+        exp = df.assign(newcol=False).to_sparse(fill_value=False)
+
+        tm.assert_sp_frame_equal(res, exp)
+
+        for column in res.columns:
+            assert type(res[column]) is SparseSeries
+
diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
index 8de93ff320961..c8d2b939c0d34 100644
--- a/pandas/tests/sparse/test_array.py
+++ b/pandas/tests/sparse/test_array.py
@@ -113,6 +113,17 @@ def test_constructor_spindex_dtype(self):
         assert arr.dtype == np.int64
         assert arr.fill_value == 0
 
+    @pytest.mark.parametrize('scalar,dtype', [(False, bool), (0.0, 'float64'), (1, 'int64'), ('z', 'object')])
+    def test_scalar_with_index_infer_dtype(self, scalar, dtype):
+        # GH 19163
+        arr = SparseArray(scalar, index=[1,2,3], fill_value=scalar)
+        exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)
+
+        tm.assert_sp_array_equal(arr, exp)
+
+        assert arr.dtype == dtype
+        assert exp.dtype == dtype
+
     def test_sparseseries_roundtrip(self):
         # GH 13999
         for kind in ['integer', 'block']:

From ac6213af27c87609fd27151763adfc957908de5a Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Fri, 19 Jan 2018 16:14:06 +0700
Subject: [PATCH 3/9] Cleanup

---
 pandas/core/frame.py                      | 2 +-
 pandas/tests/frame/test_mutate_columns.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 202a1e7765b57..47b3292e49dfd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2650,7 +2650,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
                           allow_duplicates=allow_duplicates)
 
     def assign(self, **kwargs):
-        """
+        r"""
         Assign new columns to a DataFrame, returning a new object
         (a copy) with all the original columns in addition to the new ones.
 
diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py
index 25173cf04d9e4..9acdf2f17d86a 100644
--- a/pandas/tests/frame/test_mutate_columns.py
+++ b/pandas/tests/frame/test_mutate_columns.py
@@ -6,9 +6,9 @@
 import numpy as np
 from pandas.compat import PY36
 
-from pandas import DataFrame, Series, Index, MultiIndex, SparseSeries
+from pandas import DataFrame, Series, Index, MultiIndex
 
-from pandas.util.testing import assert_frame_equal, assert_series_equal
+from pandas.util.testing import assert_frame_equal
 
 import pandas.util.testing as tm
 

From 30425685fdeade91e5e620353ff3ee749fcba14b Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Fri, 19 Jan 2018 16:16:35 +0700
Subject: [PATCH 4/9] More cleanup

---
 pandas/tests/sparse/frame/test_frame.py | 3 +--
 pandas/tests/sparse/test_array.py       | 8 ++++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index 1454c7d1351cf..91c3817478782 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -1274,7 +1274,7 @@ def test_quantile_multi(self):
 
     def test_assign_with_sparse_frame(self):
         # GH 19163
-        df = pd.DataFrame({"a":[1,2,3]})
+        df = pd.DataFrame({"a": [1, 2, 3]})
         res = df.to_sparse(fill_value=False).assign(newcol=False)
         exp = df.assign(newcol=False).to_sparse(fill_value=False)
 
@@ -1282,4 +1282,3 @@ def test_assign_with_sparse_frame(self):
 
         for column in res.columns:
             assert type(res[column]) is SparseSeries
-
diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
index c8d2b939c0d34..6c0c83cf65ff7 100644
--- a/pandas/tests/sparse/test_array.py
+++ b/pandas/tests/sparse/test_array.py
@@ -113,10 +113,14 @@ def test_constructor_spindex_dtype(self):
         assert arr.dtype == np.int64
         assert arr.fill_value == 0
 
-    @pytest.mark.parametrize('scalar,dtype', [(False, bool), (0.0, 'float64'), (1, 'int64'), ('z', 'object')])
+    @pytest.mark.parametrize('scalar,dtype', [
+        (False, bool),
+        (0.0, 'float64'),
+        (1, 'int64'),
+        ('z', 'object')])
     def test_scalar_with_index_infer_dtype(self, scalar, dtype):
         # GH 19163
-        arr = SparseArray(scalar, index=[1,2,3], fill_value=scalar)
+        arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
         exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)
 
         tm.assert_sp_array_equal(arr, exp)

From d900e11f376f904ecda59de575552d48b60f8485 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Fri, 19 Jan 2018 16:17:41 +0700
Subject: [PATCH 5/9] More cleanup

---
 doc/source/whatsnew/v0.23.0.txt | 2 --
 pandas/core/frame.py            | 1 -
 2 files changed, 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 80020742d1f44..f47b4c8412a0f 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -509,8 +509,6 @@ Reshaping
 - Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`)
 - Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`)
 
->>>>>>> upstream/master
-
 Numeric
 ^^^^^^^
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 47b3292e49dfd..f0919871218f5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2714,7 +2714,6 @@ def assign(self, **kwargs):
         8   9  0.549296  2.197225
         9  10 -0.758542  2.302585
         """
-
         data = self.copy()
 
         # do all calculations first...

From a17a5931b263b0e08a0e3e4e706443816c08d757 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Sat, 20 Jan 2018 07:27:24 +0700
Subject: [PATCH 6/9] Comments from PR Updates

---
 doc/source/whatsnew/v0.23.0.txt | 2 +-
 pandas/core/sparse/array.py     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index f47b4c8412a0f..3830604ce5f8e 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -491,7 +491,7 @@ Groupby/Resample/Rolling
 Sparse
 ^^^^^^
 
-- Bug in :class:`SparseArray` where if a scalar and index are passed in it will coerce to float64 regardless of scalar's dtype. (:issue:`19163`)
+- Bug in constructing a :class:`SparseArray`: if `data` is a scalar and `index` is defined it will coerce to float64 regardless of scalar's dtype. (:issue:`19163`)
 -
 -
 
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 31dbedcf9218d..7fdf51d0e07aa 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -27,7 +27,7 @@
     is_scalar, is_dtype_equal)
 from pandas.core.dtypes.cast import (
     maybe_convert_platform, maybe_promote,
-    astype_nansafe, find_common_type, infer_dtype_from)
+    astype_nansafe, find_common_type, infer_dtype_from_scalar)
 from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype
 
 import pandas._libs.sparse as splib
@@ -195,7 +195,7 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
                 data = np.nan
             if not is_scalar(data):
                 raise Exception("must only pass scalars with an index ")
-            values = np.empty(len(index), dtype=infer_dtype_from(data)[0])
+            values = np.empty(len(index), dtype=infer_dtype_from_scalar(data)[0])
             values.fill(data)
             data = values
 

From 559434a5d0e4560f39de51d2c48f7929b0bd44b5 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 5 Feb 2018 10:12:03 +0700
Subject: [PATCH 7/9] Fix linting error

---
 pandas/core/sparse/array.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 0ad4a3dcceec7..0134c27209004 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -161,7 +161,8 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
                 data = np.nan
             if not is_scalar(data):
                 raise Exception("must only pass scalars with an index ")
-            values = np.empty(len(index), dtype=infer_dtype_from_scalar(data)[0])
+            values = np.empty(len(index),
+                              dtype=infer_dtype_from_scalar(data)[0])
             values.fill(data)
             data = values
 

From 16a272d4a59ca998edd7c6243bbd364e7f6d49aa Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Mon, 5 Feb 2018 19:23:04 +0700
Subject: [PATCH 8/9] Update whatsnew entry and use cast function

---
 doc/source/whatsnew/v0.23.0.txt |  2 +-
 pandas/core/sparse/array.py     | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 7f4e75bd90707..09d7acda59021 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -555,7 +555,7 @@ Sparse
 
 - Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`)
 - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`)
-- Bug in constructing a :class:`SparseArray`: if `data` is a scalar and `index` is defined it will coerce to float64 regardless of scalar's dtype. (:issue:`19163`)
+- Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to float64 regardless of scalar's dtype. (:issue:`19163`)
 -
 
 Reshaping
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 0134c27209004..e3b047c34400f 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -26,7 +26,8 @@
     is_scalar, is_dtype_equal)
 from pandas.core.dtypes.cast import (
     maybe_convert_platform, maybe_promote,
-    astype_nansafe, find_common_type, infer_dtype_from_scalar)
+    astype_nansafe, find_common_type, infer_dtype_from_scalar,
+    construct_1d_arraylike_from_scalar)
 from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype
 
 import pandas._libs.sparse as splib
@@ -161,10 +162,9 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
                 data = np.nan
             if not is_scalar(data):
                 raise Exception("must only pass scalars with an index ")
-            values = np.empty(len(index),
-                              dtype=infer_dtype_from_scalar(data)[0])
-            values.fill(data)
-            data = values
+            dtype = infer_dtype_from_scalar(data)[0]
+            data = construct_1d_arraylike_from_scalar(data, len(index),
+                                                      dtype)
 
         if isinstance(data, ABCSparseSeries):
             data = data.values

From a81796a2d2d5fcc03e6bf3807c73164f2ed5a97a Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 12 Feb 2018 06:39:26 -0500
Subject: [PATCH 9/9] clean

---
 doc/source/whatsnew/v0.23.0.txt | 2 +-
 pandas/core/sparse/array.py     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 4485e704f5bd1..72f63a4da0f4d 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -822,7 +822,7 @@ Sparse
 - Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`)
 - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`)
 - Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`)
-- Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to float64 regardless of scalar's dtype. (:issue:`19163`)
+- Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index ab0689fbb8fc2..3cbae717d0e07 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -164,8 +164,8 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
             if not is_scalar(data):
                 raise Exception("must only pass scalars with an index ")
             dtype = infer_dtype_from_scalar(data)[0]
-            data = construct_1d_arraylike_from_scalar(data, len(index),
-                                                      dtype)
+            data = construct_1d_arraylike_from_scalar(
+                data, len(index), dtype)
 
         if isinstance(data, ABCSparseSeries):
             data = data.values