diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 02ef2bbed19b6..14e185b5b2a26 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -103,4 +103,4 @@ Bug Fixes - Bug that caused segfault when resampling an empty Series (:issue:`10228`) - Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`) - +- Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4c4d940f8077c..42d7163e7f741 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4133,7 +4133,7 @@ def get_empty_dtype_and_na(join_units): else: return np.dtype(np.bool_), None elif 'category' in upcast_classes: - return com.CategoricalDtype(), np.nan + return np.dtype(np.object_), np.nan elif 'float' in upcast_classes: return np.dtype(np.float64), np.nan elif 'datetime' in upcast_classes: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index beff41fd9d109..63b913f59f18a 100755 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -2967,6 +2967,24 @@ def test_pickle_v0_15_2(self): # self.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + def test_concat_categorical(self): + # See GH 10177 + df1 = pd.DataFrame(np.arange(18).reshape(6, 3), columns=["a", "b", "c"]) + + df2 = pd.DataFrame(np.arange(14).reshape(7, 2), columns=["a", "c"]) + df2['h'] = pd.Series(pd.Categorical(["one", "one", "two", "one", "two", "two", "one"])) + + df_concat = pd.concat((df1, df2), axis=0).reset_index(drop=True) + + df_expected = pd.DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], + 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13]}) + df_expected['h'] = pd.Series(pd.Categorical([None, None, None, None, None, None, + "one", "one", "two", "one", "two", "two", "one"])) + + tm.assert_frame_equal(df_expected, df_concat) + + if __name__ == '__main__': import nose