Skip to content

Commit eeb7416

Browse files
author
Christoph Möhl
committed
BUG GH15150 crosstable normalize with multiindex
1 parent 2e1f5d7 commit eeb7416

File tree

2 files changed

+36
-28
lines changed

2 files changed

+36
-28
lines changed

pandas/core/reshape/pivot.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -547,35 +547,46 @@ def _normalize(table, normalize, margins, margins_name='All'):
547547
raise ValueError("Not a valid normalize argument")
548548

549549
if margins is False:
550-
551550
# Actual Normalizations
552551
normalizers = {
553552
'all': lambda x: x / x.sum(axis=1).sum(axis=0),
554553
'columns': lambda x: x / x.sum(),
555554
'index': lambda x: x.div(x.sum(axis=1), axis=0)
556555
}
557-
556+
558557
elif margins is True:
559-
#skip margin rows and/or cols for normalization
558+
# skip margin rows and/or cols for normalization
560559
normalizers = {
561-
'all': lambda x: x / x.iloc[:-1,:-1].sum(axis=1).sum(axis=0),
562-
'columns': lambda x: x.div(x.iloc[:-1,:].sum()).iloc[:-1,:],
563-
'index': lambda x: (x.div(x.iloc[:,:-1].sum(axis=1), axis=0)).iloc[:,:-1]
560+
'all': lambda x: x / x.iloc[:-1, :-1].sum(axis=1).sum(axis=0),
561+
'columns': lambda x: x.div(x.iloc[:-1, :].sum()).iloc[:-1, :],
562+
'index': lambda x: (x.div(x.iloc[:, :-1].sum(axis=1),
563+
axis=0)).iloc[:, :-1]
564564
}
565565

566566
else:
567-
raise ValueError("Not a valid margins argument")
567+
raise ValueError("Not a valid margins argument")
568568

569569
normalizers[True] = normalizers['all']
570570

571571
try:
572572
f = normalizers[normalize]
573573
except KeyError:
574574
raise ValueError("Not a valid normalize argument")
575-
575+
576576
table = f(table)
577577
table = table.fillna(0)
578-
578+
579+
if margins is True:
580+
# reset index to ensure default index dtype
581+
if normalize == 'index':
582+
colnames = table.columns.names
583+
table.columns = Index(table.columns.tolist())
584+
table.columns.names = colnames
585+
if normalize == 'columns':
586+
rownames = table.index.names
587+
table.index = Index(table.index.tolist())
588+
table.index.names = rownames
589+
579590
return table
580591

581592

pandas/tests/reshape/test_pivot.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,21 +1300,18 @@ def test_crosstab_normalize(self):
13001300
[0.25, 0.75],
13011301
[0.4, 0.6]],
13021302
index=pd.Index([1, 2, 'All'],
1303-
name='a',
1304-
dtype='object'),
1303+
name='a'),
13051304
columns=pd.Index([3, 4], name='b'))
13061305
col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
1307-
index=pd.Index([1, 2], name='a',
1308-
dtype='object'),
1306+
index=pd.Index([1, 2], name='a'),
13091307
columns=pd.Index([3, 4, 'All'],
13101308
name='b'))
13111309

13121310
all_normal_margins = pd.DataFrame([[0.2, 0, 0.2],
13131311
[0.2, 0.6, 0.8],
13141312
[0.4, 0.6, 1]],
13151313
index=pd.Index([1, 2, 'All'],
1316-
name='a',
1317-
dtype='object'),
1314+
name='a'),
13181315
columns=pd.Index([3, 4, 'All'],
13191316
name='b'))
13201317
tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index',
@@ -1361,10 +1358,10 @@ def test_crosstab_normalize(self):
13611358

13621359
def test_crosstab_norm_margins_with_multiindex(self):
13631360
# GH 15150
1364-
a = np.array(['foo', 'bar', 'foo', 'bar','bar', 'foo'])
1365-
b = np.array(['one', 'one', 'two', 'one','two', 'two'])
1366-
c = np.array(['dull', 'shiny', 'dull', 'dull','dull', 'shiny'])
1367-
d = np.array(['a', 'a', 'b', 'a','b', 'b'])
1361+
a = np.array(['foo', 'bar', 'foo', 'bar', 'bar', 'foo'])
1362+
b = np.array(['one', 'one', 'two', 'one', 'two', 'two'])
1363+
c = np.array(['dull', 'shiny', 'dull', 'dull', 'dull', 'shiny'])
1364+
d = np.array(['a', 'a', 'b', 'a', 'b', 'b'])
13681365
expected_col_colnorm = MultiIndex(levels=[['All', 'dull', 'shiny'],
13691366
['', 'a', 'b']],
13701367
labels=[[1, 1, 2, 2, 0],
@@ -1399,11 +1396,11 @@ def test_crosstab_norm_margins_with_multiindex(self):
13991396
[.33333333, .33333333,
14001397
.16666667, .16666667]])
14011398
expected_indexnorm = pd.DataFrame(expected_data_indexnorm,
1402-
index=expected_index_indexnorm,
1403-
columns=expected_col_indexnorm)
1399+
index=expected_index_indexnorm,
1400+
columns=expected_col_indexnorm)
14041401
expected_data_allnorm = np.array([[0.16666667, 0., .16666667,
14051402
0., .33333333],
1406-
[0. ,.16666667, 0.,
1403+
[0., .16666667, 0.,
14071404
0., .16666667],
14081405
[.16666667, 0., 0.,
14091406
0., .16666667],
@@ -1412,15 +1409,15 @@ def test_crosstab_norm_margins_with_multiindex(self):
14121409
[0.33333333, .33333333, .16666667,
14131410
.16666667, 1.]])
14141411
expected_allnorm = pd.DataFrame(expected_data_allnorm,
1415-
index=expected_index_indexnorm,
1416-
columns=expected_col_colnorm)
1412+
index=expected_index_indexnorm,
1413+
columns=expected_col_colnorm)
14171414

1418-
result_colnorm = pd.crosstab([a, b], [c,d], normalize='columns',
1415+
result_colnorm = pd.crosstab([a, b], [c, d], normalize='columns',
14191416
margins=True)
1420-
result_indexnorm = pd.crosstab([a, b], [c,d], normalize='index',
1421-
margins=True)
1422-
result_allnorm = pd.crosstab([a, b], [c,d], normalize='all',
1417+
result_indexnorm = pd.crosstab([a, b], [c, d], normalize='index',
14231418
margins=True)
1419+
result_allnorm = pd.crosstab([a, b], [c, d], normalize='all',
1420+
margins=True)
14241421

14251422
tm.assert_frame_equal(result_colnorm, expected_colnorm)
14261423
tm.assert_frame_equal(result_indexnorm, expected_indexnorm)

0 commit comments

Comments
 (0)