From ba8dc2921cf1952da6a27e20a663b40eee7c1bd7 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Sat, 23 Dec 2017 18:38:53 -0800 Subject: [PATCH 1/8] BUG: fix issue with sparse concatting This was originally brought up in :issue:`18686` and :issue:`18914`. Basically the problem is when you use get_dummies with sparse=True it will return a SparseDataFrame with sparse and dense columns. This is in fact not what we want. What we want is a DataFrame with sparse and dense columns. Inside of pandas.core.dtypes.concat is a function that defines the factory class which needed to be changed. --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/dtypes/concat.py | 4 +-- pandas/core/sparse/series.py | 1 - pandas/tests/reshape/test_reshape.py | 9 +++++++ pandas/tests/sparse/test_combine_concat.py | 30 ++++++++++------------ 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 92eeed89ada2a..2a18904a88a2f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -448,6 +448,7 @@ Reshaping - Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) - Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) - Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) +- Bug in :func:`concat` when concatting sparse and dense series it returns only a SparseDataFrame. Should be a DataFrame. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) Numeric ^^^^^^^ diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 5e6193d673756..080a1e6233197 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -89,10 +89,10 @@ def _get_series_result_type(result, objs=None): def _get_frame_result_type(result, objs): """ return appropriate class of DataFrame-like concat - if any block is SparseBlock, return SparseDataFrame + if all blocks are SparseBlock, return SparseDataFrame otherwise, return 1st obj """ - if any(b.is_sparse for b in result.blocks): + if result.blocks and all(b.is_sparse for b in result.blocks): from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index b5d2c0b607444..e6393a64ecc79 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -168,7 +168,6 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', if index is None: index = data.index.view() else: - data = data.reindex(index, copy=False) else: diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 22925cceb30d1..c9d079421532f 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -454,6 +454,15 @@ def test_dataframe_dummies_preserve_categorical_dtype(self, dtype): tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('sparse', [True, False]) + def test_get_dummies_dont_sparsify_all_columns(self, sparse): + # GH18914 + df = DataFrame.from_items([('GDP', [1, 2]), ('Nation', ['AB', 'CD'])]) + df = get_dummies(df, columns=['Nation'], sparse=sparse) + df2 = df.reindex(columns=['GDP']) + + tm.assert_frame_equal(df[['GDP']], df2) + class TestCategoricalReshape(object): diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index 15639fbe156c6..4579b94a83496 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -1,4 +1,5 @@ # pylint: disable-msg=E1101,W0612 +import pytest import numpy as np import pandas as pd @@ -317,37 +318,34 @@ def test_concat_axis1(self): assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) - def test_concat_sparse_dense(self): - sparse = self.dense1.to_sparse() - + @pytest.mark.parametrize('fill_value', [None, 0]) + def test_concat_sparse_dense(self, fill_value): + sparse = self.dense1.to_sparse(fill_value=fill_value) res = pd.concat([sparse, self.dense2]) exp = pd.concat([self.dense1, self.dense2]) - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense2, sparse]) - exp = pd.concat([self.dense2, self.dense1]) - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - sparse = self.dense1.to_sparse(fill_value=0) - res = pd.concat([sparse, self.dense2]) - exp = pd.concat([self.dense1, self.dense2]) assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) res = pd.concat([self.dense2, sparse]) exp = pd.concat([self.dense2, self.dense1]) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) res = pd.concat([self.dense3, sparse], axis=1) exp = pd.concat([self.dense3, self.dense1], axis=1) - assert isinstance(res, pd.SparseDataFrame) + # See GH18914 and #18686 for why this should be + # A DataFrame + assert isinstance(res, pd.DataFrame) + for column in self.dense3.columns: + tm.assert_series_equal(res[column], exp[column]) + tm.assert_frame_equal(res, exp) res = pd.concat([sparse, self.dense3], axis=1) exp = pd.concat([self.dense1, self.dense3], axis=1) - assert isinstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.DataFrame) + for column in self.dense3.columns: + tm.assert_series_equal(res[column], exp[column]) tm.assert_frame_equal(res, exp) From 05a0717f886e3f6963cff422f2712004482d5012 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Fri, 5 Jan 2018 08:21:43 +0700 Subject: [PATCH 2/8] Add tests for 16874 --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/tests/sparse/test_combine_concat.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 2a18904a88a2f..70dee7b018353 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -448,7 +448,7 @@ Reshaping - Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) - Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) - Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) -- Bug in :func:`concat` when concatting sparse and dense series it returns only a SparseDataFrame. Should be a DataFrame. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) +- Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) Numeric ^^^^^^^ diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index 4579b94a83496..4f4176b261312 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -338,6 +338,10 @@ def test_concat_sparse_dense(self, fill_value): # See GH18914 and #18686 for why this should be # A DataFrame assert isinstance(res, pd.DataFrame) + # See GH16874 + assert res.isnull() + assert res[res.columns[0]] + assert res.iloc[0,0] for column in self.dense3.columns: tm.assert_series_equal(res[column], exp[column]) @@ -346,6 +350,10 @@ def test_concat_sparse_dense(self, fill_value): res = pd.concat([sparse, self.dense3], axis=1) exp = pd.concat([self.dense1, self.dense3], axis=1) assert isinstance(res, pd.DataFrame) + # See GH16874 + assert res.isnull() + assert res[res.columns[0]] + assert res.iloc[0,0] for column in self.dense3.columns: tm.assert_series_equal(res[column], exp[column]) tm.assert_frame_equal(res, exp) From 6fc636975e6dd9131d54c5116bc21d8bc27825c6 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Tue, 9 Jan 2018 11:58:34 +0700 Subject: [PATCH 3/8] Add more specific tests --- pandas/core/dtypes/concat.py | 6 ++++-- pandas/tests/sparse/test_combine_concat.py | 17 +++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 080a1e6233197..8e9a6bb2f2b99 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -92,11 +92,13 @@ def _get_frame_result_type(result, objs): if all blocks are SparseBlock, return SparseDataFrame otherwise, return 1st obj """ + + from pandas.core.sparse.api import SparseDataFrame + if result.blocks and all(b.is_sparse for b in result.blocks): - from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: - return objs[0] + return next(obj for obj in objs if not type(obj) == SparseDataFrame) def _concat_compat(to_concat, axis=0): diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index 4f4176b261312..e16de3fce9ea9 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -337,11 +337,12 @@ def test_concat_sparse_dense(self, fill_value): exp = pd.concat([self.dense3, self.dense1], axis=1) # See GH18914 and #18686 for why this should be # A DataFrame - assert isinstance(res, pd.DataFrame) + assert type(res) is pd.DataFrame # See GH16874 - assert res.isnull() - assert res[res.columns[0]] - assert res.iloc[0,0] + assert not res.isnull().empty + assert not res[res.columns[0]].empty + assert res.iloc[0,0] == self.dense3.iloc[0,0] + for column in self.dense3.columns: tm.assert_series_equal(res[column], exp[column]) @@ -349,11 +350,11 @@ def test_concat_sparse_dense(self, fill_value): res = pd.concat([sparse, self.dense3], axis=1) exp = pd.concat([self.dense1, self.dense3], axis=1) - assert isinstance(res, pd.DataFrame) + assert type(res) is pd.DataFrame # See GH16874 - assert res.isnull() - assert res[res.columns[0]] - assert res.iloc[0,0] + assert not res.isnull().empty + assert not res[res.columns[0]].empty + assert res.iloc[0,0] == sparse.iloc[0,0] for column in self.dense3.columns: tm.assert_series_equal(res[column], exp[column]) tm.assert_frame_equal(res, exp) From 3c0a4daf2eb21920bf5893d42b4da5e47f3beaff Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 31 Jan 2018 06:42:54 +0700 Subject: [PATCH 4/8] FIX: fix linting failure with testing --- pandas/tests/sparse/test_combine_concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index e16de3fce9ea9..c470c8b2da489 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -341,7 +341,7 @@ def test_concat_sparse_dense(self, fill_value): # See GH16874 assert not res.isnull().empty assert not res[res.columns[0]].empty - assert res.iloc[0,0] == self.dense3.iloc[0,0] + assert res.iloc[0, 0] == self.dense3.iloc[0, 0] for column in self.dense3.columns: tm.assert_series_equal(res[column], exp[column]) @@ -354,7 +354,7 @@ def test_concat_sparse_dense(self, fill_value): # See GH16874 assert not res.isnull().empty assert not res[res.columns[0]].empty - assert res.iloc[0,0] == sparse.iloc[0,0] + assert res.iloc[0, 0] == sparse.iloc[0, 0] for column in self.dense3.columns: tm.assert_series_equal(res[column], exp[column]) tm.assert_frame_equal(res, exp) From 126db414791721d2297a6419bf0f7eab5ff12a18 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 31 Jan 2018 10:10:46 +0700 Subject: [PATCH 5/8] FIX: use ABCSparseDataFrame and cleanup test a bit --- pandas/core/dtypes/concat.py | 4 +- pandas/core/dtypes/generic.py | 1 + pandas/tests/sparse/test_combine_concat.py | 86 ++++++++++++---------- 3 files changed, 49 insertions(+), 42 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 5ae3e2821e8ec..4cd1e86d9a5b2 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -19,7 +19,7 @@ _TD_DTYPE) from pandas.core.dtypes.generic import ( ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex, ABCRangeIndex) + ABCPeriodIndex, ABCRangeIndex, ABCSparseDataFrame) def get_dtype_kinds(l): @@ -98,7 +98,7 @@ def _get_frame_result_type(result, objs): if result.blocks and all(b.is_sparse for b in result.blocks): return SparseDataFrame else: - return next(obj for obj in objs if not type(obj) == SparseDataFrame) + return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame)) def _concat_compat(to_concat, axis=0): diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 6fae09c43d2be..794330cfe0281 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -43,6 +43,7 @@ def _check(cls, inst): ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series", )) ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe", )) +ABCSparseDataFrame = create_pandas_abc_type("ABCSparseDataFrame", "_subtyp", ("sparse_frame", )) ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",)) ABCSparseSeries = create_pandas_abc_type("ABCSparseSeries", "_subtyp", ('sparse_series', diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index c470c8b2da489..57f9b5f52de21 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -5,6 +5,7 @@ import pandas as pd import pandas.util.testing as tm +import itertools class TestSparseSeriesConcat(object): @@ -318,43 +319,48 @@ def test_concat_axis1(self): assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) - @pytest.mark.parametrize('fill_value', [None, 0]) - def test_concat_sparse_dense(self, fill_value): - sparse = self.dense1.to_sparse(fill_value=fill_value) - res = pd.concat([sparse, self.dense2]) - exp = pd.concat([self.dense1, self.dense2]) - - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense2, sparse]) - exp = pd.concat([self.dense2, self.dense1]) - - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense3, sparse], axis=1) - exp = pd.concat([self.dense3, self.dense1], axis=1) - # See GH18914 and #18686 for why this should be - # A DataFrame - assert type(res) is pd.DataFrame - # See GH16874 - assert not res.isnull().empty - assert not res[res.columns[0]].empty - assert res.iloc[0, 0] == self.dense3.iloc[0, 0] - - for column in self.dense3.columns: - tm.assert_series_equal(res[column], exp[column]) - - tm.assert_frame_equal(res, exp) - - res = pd.concat([sparse, self.dense3], axis=1) - exp = pd.concat([self.dense1, self.dense3], axis=1) - assert type(res) is pd.DataFrame - # See GH16874 - assert not res.isnull().empty - assert not res[res.columns[0]].empty - assert res.iloc[0, 0] == sparse.iloc[0, 0] - for column in self.dense3.columns: - tm.assert_series_equal(res[column], exp[column]) - tm.assert_frame_equal(res, exp) + @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', itertools.product([None, 0, 1, np.nan], [0,1], [1,0])) + def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): + frames = [self.dense1, self.dense2] + sparse_frame = [frames[dense_idx], frames[sparse_idx].to_sparse(fill_value=fill_value)] + dense_frame = [frames[dense_idx], frames[sparse_idx]] + + for _ in range(2): + res = pd.concat(sparse_frame) + exp = pd.concat(dense_frame) + + assert isinstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + sparse_frame = sparse_frame[::-1] + dense_frame = dense_frame[::-1] + + @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', itertools.product([None, 0, 1, np.nan], [0,1], [1,0])) + def test_concat_sparse_dense_columns(self, fill_value, sparse_idx, dense_idx): + dense_frames = [self.dense1, self.dense3] + + sparse_frame = [dense_frames[dense_idx], dense_frames[sparse_idx].to_sparse(fill_value=fill_value)] + dense_frame = [dense_frames[dense_idx], dense_frames[sparse_idx]] + + for _ in range(2): + res = pd.concat(sparse_frame, axis=1) + exp = pd.concat(dense_frame, axis=1) + + # See GH18914 and #18686 for why this should be + # A DataFrame + assert type(res) is pd.DataFrame + # See GH16874 + assert not res.isnull().empty + assert not res[res.columns[0]].empty + assert res.iloc[0, 0] == exp.iloc[0, 0] + + for column in dense_frames[dense_idx].columns: + if dense_idx == sparse_idx: + tm.assert_frame_equal(res[column], exp[column]) + else: + tm.assert_series_equal(res[column], exp[column]) + + tm.assert_frame_equal(res, exp) + + sparse_frame = sparse_frame[::-1] + dense_frame = dense_frame[::-1] From 6d01387f0257a566e4846f1a8d0130ae0f030c7f Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 31 Jan 2018 10:14:29 +0700 Subject: [PATCH 6/8] Add test for ABCSparseDataFrame --- pandas/tests/dtypes/test_generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 58cb182e7d403..53f92b98f022e 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -18,6 +18,7 @@ class TestABCClasses(object): df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index) sparse_series = pd.Series([1, 2, 3]).to_sparse() sparse_array = pd.SparseArray(np.random.randn(10)) + sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]}) def test_abc_types(self): assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) @@ -37,6 +38,7 @@ def test_abc_types(self): assert isinstance(self.df.to_panel(), gt.ABCPanel) assert isinstance(self.sparse_series, gt.ABCSparseSeries) assert isinstance(self.sparse_array, gt.ABCSparseArray) + assert isinstance(self.sparse_frame, gt.ABCSparseDataFrame) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) From 81aba2ee596128d2a1605bf16b520f3433fc7266 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 31 Jan 2018 15:23:01 +0700 Subject: [PATCH 7/8] Cleanup linting problems --- pandas/core/dtypes/concat.py | 3 ++- pandas/core/dtypes/generic.py | 3 ++- pandas/tests/sparse/test_combine_concat.py | 28 ++++++++++++++-------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 4cd1e86d9a5b2..ea84e3641f425 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -98,7 +98,8 @@ def _get_frame_result_type(result, objs): if result.blocks and all(b.is_sparse for b in result.blocks): return SparseDataFrame else: - return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame)) + return next(obj for obj in objs if not isinstance(obj, + ABCSparseDataFrame)) def _concat_compat(to_concat, axis=0): diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 794330cfe0281..b032cb6f14d4c 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -43,7 +43,8 @@ def _check(cls, inst): ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series", )) ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe", )) -ABCSparseDataFrame = create_pandas_abc_type("ABCSparseDataFrame", "_subtyp", ("sparse_frame", )) +ABCSparseDataFrame = create_pandas_abc_type("ABCSparseDataFrame", "_subtyp", + ("sparse_frame", )) ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",)) ABCSparseSeries = create_pandas_abc_type("ABCSparseSeries", "_subtyp", ('sparse_series', diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index 57f9b5f52de21..b82377948815f 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -4,9 +4,9 @@ import numpy as np import pandas as pd import pandas.util.testing as tm - import itertools + class TestSparseSeriesConcat(object): def test_concat(self): @@ -319,10 +319,14 @@ def test_concat_axis1(self): assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) - @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', itertools.product([None, 0, 1, np.nan], [0,1], [1,0])) + @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', + itertools.product([None, 0, 1, np.nan], + [0, 1], + [1, 0])) def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): frames = [self.dense1, self.dense2] - sparse_frame = [frames[dense_idx], frames[sparse_idx].to_sparse(fill_value=fill_value)] + sparse_frame = [frames[dense_idx], + frames[sparse_idx].to_sparse(fill_value=fill_value)] dense_frame = [frames[dense_idx], frames[sparse_idx]] for _ in range(2): @@ -335,13 +339,17 @@ def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): sparse_frame = sparse_frame[::-1] dense_frame = dense_frame[::-1] - @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', itertools.product([None, 0, 1, np.nan], [0,1], [1,0])) - def test_concat_sparse_dense_columns(self, fill_value, sparse_idx, dense_idx): - dense_frames = [self.dense1, self.dense3] + @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', + itertools.product([None, 0, 1, np.nan], + [0, 1], + [1, 0])) + def test_concat_sparse_dense_cols(self, fill_value, sparse_idx, dense_idx): + frames = [self.dense1, self.dense3] + + sparse_frame = [frames[dense_idx], + frames[sparse_idx].to_sparse(fill_value=fill_value)] + dense_frame = [frames[dense_idx], frames[sparse_idx]] - sparse_frame = [dense_frames[dense_idx], dense_frames[sparse_idx].to_sparse(fill_value=fill_value)] - dense_frame = [dense_frames[dense_idx], dense_frames[sparse_idx]] - for _ in range(2): res = pd.concat(sparse_frame, axis=1) exp = pd.concat(dense_frame, axis=1) @@ -354,7 +362,7 @@ def test_concat_sparse_dense_columns(self, fill_value, sparse_idx, dense_idx): assert not res[res.columns[0]].empty assert res.iloc[0, 0] == exp.iloc[0, 0] - for column in dense_frames[dense_idx].columns: + for column in frames[dense_idx].columns: if dense_idx == sparse_idx: tm.assert_frame_equal(res[column], exp[column]) else: From 0768990178f2a29fb167c720b33231d7f958f626 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Wed, 31 Jan 2018 18:26:24 +0700 Subject: [PATCH 8/8] Cleanup on tests from PR --- pandas/core/dtypes/concat.py | 3 +-- pandas/tests/sparse/test_combine_concat.py | 12 ++++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ea84e3641f425..ddecbe85087d8 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -93,9 +93,8 @@ def _get_frame_result_type(result, objs): otherwise, return 1st obj """ - from pandas.core.sparse.api import SparseDataFrame - if result.blocks and all(b.is_sparse for b in result.blocks): + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: return next(obj for obj in objs if not isinstance(obj, diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index b82377948815f..70fd1da529d46 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -329,6 +329,7 @@ def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): frames[sparse_idx].to_sparse(fill_value=fill_value)] dense_frame = [frames[dense_idx], frames[sparse_idx]] + # This will try both directions sparse + dense and dense + sparse for _ in range(2): res = pd.concat(sparse_frame) exp = pd.concat(dense_frame) @@ -344,24 +345,19 @@ def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): [0, 1], [1, 0])) def test_concat_sparse_dense_cols(self, fill_value, sparse_idx, dense_idx): + # See GH16874, GH18914 and #18686 for why this should be a DataFrame + frames = [self.dense1, self.dense3] sparse_frame = [frames[dense_idx], frames[sparse_idx].to_sparse(fill_value=fill_value)] dense_frame = [frames[dense_idx], frames[sparse_idx]] + # This will try both directions sparse + dense and dense + sparse for _ in range(2): res = pd.concat(sparse_frame, axis=1) exp = pd.concat(dense_frame, axis=1) - # See GH18914 and #18686 for why this should be - # A DataFrame - assert type(res) is pd.DataFrame - # See GH16874 - assert not res.isnull().empty - assert not res[res.columns[0]].empty - assert res.iloc[0, 0] == exp.iloc[0, 0] - for column in frames[dense_idx].columns: if dense_idx == sparse_idx: tm.assert_frame_equal(res[column], exp[column])