Skip to content

Commit d14873d

Browse files
committed
BUG: GH11847 Unstack with mixed dtypes coerces everything to object
Changed the way in which the original data frame is copied (dropped use of .values, since it does not preserve dtypes).
1 parent 837db72 commit d14873d

File tree

3 files changed

+45
-2
lines changed

3 files changed

+45
-2
lines changed

doc/source/whatsnew/v0.19.2.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,5 @@ Bug Fixes
6868

6969

7070
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
71+
72+
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)

pandas/core/reshape.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
277277
verify_integrity=False)
278278

279279
if isinstance(data, Series):
280-
dummy = Series(data.values, index=dummy_index)
280+
dummy = data.copy()
281+
dummy.index = dummy_index
281282
unstacked = dummy.unstack('__placeholder__')
282283
new_levels = clevels
283284
new_names = cnames
@@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):
292293

293294
return result
294295

295-
dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
296+
dummy = data.copy()
297+
dummy.index = dummy_index
296298

297299
unstacked = dummy.unstack('__placeholder__')
298300
if isinstance(unstacked, Series):

pandas/tests/frame/test_reshape.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,45 @@ def test_unstack_fill_frame_categorical(self):
282282
index=list('xyz'))
283283
assert_frame_equal(result, expected)
284284

285+
def test_unstack_preserve_dtypes(self):
286+
# Checks fix for #11847
287+
df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
288+
index=['a', 'b', 'c'],
289+
some_categories=pd.Series(['a', 'b', 'c']).astype('category'),
290+
A=np.random.rand(3),
291+
B=1,
292+
C='foo',
293+
D=pd.Timestamp('20010102'),
294+
E=pd.Series([1.0, 50.0, 100.0]
295+
).astype('float32'),
296+
F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
297+
G=False,
298+
H=pd.Series([1, 200, 923442], dtype='int8')))
299+
300+
def unstack_and_compare(df, column_name):
301+
unstacked1 = df.unstack([column_name])
302+
unstacked2 = df.unstack(column_name)
303+
assert_frame_equal(unstacked1, unstacked2)
304+
305+
df1 = df.set_index(['state', 'index'])
306+
unstack_and_compare(df1, 'index')
307+
308+
df1 = df.set_index(['state', 'some_categories'])
309+
unstack_and_compare(df1, 'some_categories')
310+
311+
df1 = df.set_index(['F', 'C'])
312+
unstack_and_compare(df1, 'F')
313+
314+
df1 = df.set_index(['G', 'B', 'state'])
315+
unstack_and_compare(df1, 'B')
316+
317+
df1 = df.set_index(['E', 'A'])
318+
unstack_and_compare(df1, 'E')
319+
320+
df1 = df.set_index(['state', 'index'])
321+
s = df1['A']
322+
unstack_and_compare(s, 'index')
323+
285324
def test_stack_ints(self):
286325
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
287326
repeat=3)))

0 commit comments

Comments
 (0)