Skip to content

Commit 9c55b4d

Browse files
author
Christoph Möhl
committed
added whatsnew and reformatted tests to be more readable
1 parent 66ef8df commit 9c55b4d

File tree

3 files changed

+77
-61
lines changed

3 files changed

+77
-61
lines changed

doc/source/whatsnew/v0.21.0.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,9 @@ Reshaping
311311
- Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`)
312312
- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`)
313313
- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
314+
- Bug in ``pd.crosstab(normalize=True, margins=True)`` when at least one axis has a multi-index (:issue:`15150`)
315+
316+
>>>>>>> added whatsnew and reformatted tests to be more readable
314317

315318
Numeric
316319
^^^^^^^

pandas/core/reshape/pivot.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,6 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
532532
if values is None and margins:
533533
table = table.fillna(0).astype(np.int64)
534534

535-
536535
if margins:
537536
_check_margins_name(margins_name, table)
538537

pandas/tests/reshape/test_pivot.py

Lines changed: 74 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,66 +1362,80 @@ def test_crosstab_norm_margins_with_multiindex(self):
13621362
b = np.array(['one', 'one', 'two', 'one', 'two', 'two'])
13631363
c = np.array(['dull', 'shiny', 'dull', 'dull', 'dull', 'shiny'])
13641364
d = np.array(['a', 'a', 'b', 'a', 'b', 'b'])
1365-
expected_col_colnorm = MultiIndex(levels=[['All', 'dull', 'shiny'],
1366-
['', 'a', 'b']],
1367-
labels=[[1, 1, 2, 2, 0],
1368-
[1, 2, 1, 2, 0]],
1369-
names=['col_0', 'col_1'])
1370-
expected_index_colnorm = MultiIndex(levels=[['All', 'bar', 'foo'],
1371-
['', 'one', 'two']],
1372-
labels=[[1, 1, 2, 2],
1373-
[1, 2, 1, 2]],
1374-
names=['row_0', 'row_1'])
1375-
expected_data_colnorm = np.array([[.5, 0., 1., 0., .333333],
1376-
[0., .5, 0., 0., .166667],
1377-
[.5, 0., 0., 0., .166667],
1378-
[0., .5, 0., 1., .333333]])
1379-
expected_colnorm = pd.DataFrame(expected_data_colnorm,
1380-
index=expected_index_colnorm,
1381-
columns=expected_col_colnorm)
1382-
expected_col_indexnorm = MultiIndex(levels=[['All', 'dull', 'shiny'],
1383-
['', 'a', 'b']],
1384-
labels=[[1, 1, 2, 2],
1385-
[1, 2, 1, 2]],
1386-
names=['col_0', 'col_1'])
1387-
expected_index_indexnorm = MultiIndex(levels=[['All', 'bar', 'foo'],
1388-
['', 'one', 'two']],
1389-
labels=[[1, 1, 2, 2, 0],
1390-
[1, 2, 1, 2, 0]],
1391-
names=['row_0', 'row_1'])
1392-
expected_data_indexnorm = np.array([[.5, 0., .5, 0.],
1393-
[0., 1., 0., 0.],
1394-
[1., 0., 0., 0.],
1395-
[0., .5, 0., .5],
1396-
[.33333333, .33333333,
1397-
.16666667, .16666667]])
1398-
expected_indexnorm = pd.DataFrame(expected_data_indexnorm,
1399-
index=expected_index_indexnorm,
1400-
columns=expected_col_indexnorm)
1401-
expected_data_allnorm = np.array([[0.16666667, 0., .16666667,
1402-
0., .33333333],
1403-
[0., .16666667, 0.,
1404-
0., .16666667],
1405-
[.16666667, 0., 0.,
1406-
0., .16666667],
1407-
[0., .16666667, 0.,
1408-
.16666667, .33333333],
1409-
[0.33333333, .33333333, .16666667,
1410-
.16666667, 1.]])
1411-
expected_allnorm = pd.DataFrame(expected_data_allnorm,
1412-
index=expected_index_indexnorm,
1413-
columns=expected_col_colnorm)
1414-
1415-
result_colnorm = pd.crosstab([a, b], [c, d], normalize='columns',
1416-
margins=True)
1417-
result_indexnorm = pd.crosstab([a, b], [c, d], normalize='index',
1418-
margins=True)
1419-
result_allnorm = pd.crosstab([a, b], [c, d], normalize='all',
1420-
margins=True)
1421-
1422-
tm.assert_frame_equal(result_colnorm, expected_colnorm)
1423-
tm.assert_frame_equal(result_indexnorm, expected_indexnorm)
1424-
tm.assert_frame_equal(result_allnorm, expected_allnorm)
1365+
1366+
# test for normalize == 'columns'
1367+
expected_columns = MultiIndex(levels=[['All', 'dull', 'shiny'],
1368+
['', 'a', 'b']],
1369+
labels=[[1, 1, 2, 2, 0],
1370+
[1, 2, 1, 2, 0]],
1371+
names=['col_0', 'col_1'])
1372+
expected_index = MultiIndex(levels=[['All', 'bar', 'foo'],
1373+
['', 'one', 'two']],
1374+
labels=[[1, 1, 2, 2],
1375+
[1, 2, 1, 2]],
1376+
names=['row_0', 'row_1'])
1377+
expected_data = np.array([[.5, 0., 1., 0., .333333],
1378+
[0., .5, 0., 0., .166667],
1379+
[.5, 0., 0., 0., .166667],
1380+
[0., .5, 0., 1., .333333]])
1381+
expected = pd.DataFrame(expected_data,
1382+
index=expected_index,
1383+
columns=expected_columns)
1384+
result = pd.crosstab([a, b], [c, d], normalize='columns',
1385+
margins=True)
1386+
tm.assert_frame_equal(result, expected)
1387+
1388+
# test for normalize == 'index'
1389+
expected_columns = MultiIndex(levels=[['All', 'dull', 'shiny'],
1390+
['', 'a', 'b']],
1391+
labels=[[1, 1, 2, 2],
1392+
[1, 2, 1, 2]],
1393+
names=['col_0', 'col_1'])
1394+
expected_index = MultiIndex(levels=[['All', 'bar', 'foo'],
1395+
['', 'one', 'two']],
1396+
labels=[[1, 1, 2, 2, 0],
1397+
[1, 2, 1, 2, 0]],
1398+
names=['row_0', 'row_1'])
1399+
expected_data = np.array([[.5, 0., .5, 0.],
1400+
[0., 1., 0., 0.],
1401+
[1., 0., 0., 0.],
1402+
[0., .5, 0., .5],
1403+
[.33333333, .33333333,
1404+
.16666667, .16666667]])
1405+
expected = pd.DataFrame(expected_data,
1406+
index=expected_index,
1407+
columns=expected_columns)
1408+
result = pd.crosstab([a, b], [c, d], normalize='index',
1409+
margins=True)
1410+
tm.assert_frame_equal(result, expected)
1411+
1412+
# test for normalize == 'all'
1413+
expected_columns = MultiIndex(levels=[['All', 'dull', 'shiny'],
1414+
['', 'a', 'b']],
1415+
labels=[[1, 1, 2, 2, 0],
1416+
[1, 2, 1, 2, 0]],
1417+
names=['col_0', 'col_1'])
1418+
expected_index = MultiIndex(levels=[['All', 'bar', 'foo'],
1419+
['', 'one', 'two']],
1420+
labels=[[1, 1, 2, 2, 0],
1421+
[1, 2, 1, 2, 0]],
1422+
names=['row_0', 'row_1'])
1423+
expected_data = np.array([[0.16666667, 0., .16666667,
1424+
0., .33333333],
1425+
[0., .16666667, 0.,
1426+
0., .16666667],
1427+
[.16666667, 0., 0.,
1428+
0., .16666667],
1429+
[0., .16666667, 0.,
1430+
.16666667, .33333333],
1431+
[0.33333333, .33333333, .16666667,
1432+
.16666667, 1.]])
1433+
expected = pd.DataFrame(expected_data,
1434+
index=expected_index,
1435+
columns=expected_columns)
1436+
result = pd.crosstab([a, b], [c, d], normalize='all',
1437+
margins=True)
1438+
tm.assert_frame_equal(result, expected)
14251439

14261440
def test_crosstab_with_empties(self):
14271441
# Check handling of empties

0 commit comments

Comments
 (0)