From 8d75c557a2eba63810baba08e562b1963bf5ee85 Mon Sep 17 00:00:00 2001 From: OXPHOS Date: Mon, 2 Jan 2017 19:20:36 -0500 Subject: [PATCH 1/4] fix dropna=false tests --- pandas/tools/tests/test_pivot.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index e63cfcc8c0590..9e116264dc167 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1072,17 +1072,17 @@ def test_margin_dropna(self): df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan], 'b': [3, 3, 4, 4, 4, 4]}) actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) - expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]]) - expected.index = Index([1.0, 2.0, 'All'], name='a') - expected.columns = Index([3, 4, 'All'], name='b') + expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [0, 1, 1], [2, 4, 6]]) + expected.index = Index([1.0, 2.0, np.nan, 'All'], name='a') + expected.columns = Index([3.0, 4.0, 'All'], name='b') tm.assert_frame_equal(actual, expected) df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan], 'b': [3, np.nan, 4, 4, 4, 4]}) actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) - expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]]) - expected.index = Index([1.0, 2.0, 'All'], name='a') - expected.columns = Index([3.0, 4.0, 'All'], name='b') + expected = pd.DataFrame([[1, 0, 0, 1], [0, 1, 0, 1], [0, 3, 1, 4], [1, 4, 1, 6]]) + expected.index = Index([1.0, 2.0, np.nan, 'All'], name='a') + expected.columns = Index([3.0, 4.0, np.nan, 'All'], name='b') tm.assert_frame_equal(actual, expected) a = np.array(['foo', 'foo', 'foo', 'bar', @@ -1094,20 +1094,20 @@ def test_margin_dropna(self): actual = pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'], margins=True, dropna=False) - m = MultiIndex.from_arrays([['one', 'one', 'two', 'two', 'All'], - ['dull', 'shiny', 'dull', 'shiny', '']], + m = MultiIndex.from_arrays([[np.nan, np.nan, 'one', 'one', 'two', 'two', 'All'], + ['dull', 'shiny', 'dull', 'shiny', 'dull', 'shiny', '']], names=['b', 'c']) - expected = DataFrame([[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], - [3, 0, 2, 1, 7]], columns=m) + expected = DataFrame([[0, 0, 1, 0, 1, 0, 2], [0, 1, 2, 0, 1, 1, 5], + [0, 1, 3, 0, 2, 1, 7]], columns=m) expected.index = Index(['bar', 'foo', 'All'], name='a') tm.assert_frame_equal(actual, expected) actual = pd.crosstab([a, b], c, rownames=['a', 'b'], colnames=['c'], margins=True, dropna=False) - m = MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo', 'All'], - ['one', 'two', 'one', 'two', '']], + m = MultiIndex.from_arrays([['bar', 'bar', 'bar', 'foo', 'foo', 'foo', 'All'], + [np.nan, 'one', 'two', np.nan, 'one', 'two', '']], names=['a', 'b']) - expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], + expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m) expected.columns = Index(['dull', 'shiny', 'All'], name='c') tm.assert_frame_equal(actual, expected) From 0f38f43bd135184f4fb22a060d4b33da886ac1e5 Mon Sep 17 00:00:00 2001 From: OXPHOS Date: Mon, 2 Jan 2017 19:33:58 -0500 Subject: [PATCH 2/4] fix MultiIndex initiation from np.nan --- pandas/tools/tests/test_pivot.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 9e116264dc167..bfa450888616d 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1094,9 +1094,10 @@ def test_margin_dropna(self): actual = pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'], margins=True, dropna=False) - m = MultiIndex.from_arrays([[np.nan, np.nan, 'one', 'one', 'two', 'two', 'All'], - ['dull', 'shiny', 'dull', 'shiny', 'dull', 'shiny', '']], - names=['b', 'c']) + + m = MultiIndex(levels = [Index(['All', np.nan, 'one', 'two']), + Index(['', 'dull', 'shiny'])], labels = [[1, 1, 2, 2, 3, 3, 0], + [1, 2, 1, 2, 1, 2, 0]], names=['b', 'c']) expected = DataFrame([[0, 0, 1, 0, 1, 0, 2], [0, 1, 2, 0, 1, 1, 5], [0, 1, 3, 0, 2, 1, 7]], columns=m) expected.index = Index(['bar', 'foo', 'All'], name='a') @@ -1104,9 +1105,11 @@ def test_margin_dropna(self): actual = pd.crosstab([a, b], c, rownames=['a', 'b'], colnames=['c'], margins=True, dropna=False) - m = MultiIndex.from_arrays([['bar', 'bar', 'bar', 'foo', 'foo', 'foo', 'All'], - [np.nan, 'one', 'two', np.nan, 'one', 'two', '']], - names=['a', 'b']) + + print actual.index + m = MultiIndex(levels=[['All', 'bar', 'foo'], ['', np.nan, 'one', 'two']], + labels=[[1, 1, 1, 2, 2, 2, 0], [1, 2, 3, 1, 2, 3, 0]], + names=['a', 'b']) expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m) expected.columns = Index(['dull', 'shiny', 'All'], name='c') From 2e3f8e0aff1845459a6b2d0995cf318939f475b2 Mon Sep 17 00:00:00 2001 From: OXPHOS Date: Mon, 2 Jan 2017 21:40:23 -0500 Subject: [PATCH 3/4] fix indexes dropna=false --- pandas/tools/pivot.py | 36 +++++++++++++++++++++++++++++-- pandas/tools/tests/test_pivot.py | 37 +++++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 0f56b0b076897..4995e4569ed28 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -9,6 +9,7 @@ from pandas.tools.util import cartesian_product from pandas.compat import range, lrange, zip from pandas import compat +from pandas import isnull import pandas.core.common as com import numpy as np @@ -81,9 +82,21 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', DataFrame.pivot : pivot without aggregation that can handle non-numeric data """ + pd_null = "_null_pd" + index = _convert_by(index) columns = _convert_by(columns) + keys = index + columns + + if not dropna: + key_data = np.array(data[keys], dtype='object') + _data_null_idx = isnull(key_data) + _data_null_val = key_data[_data_null_idx] + key_data[_data_null_idx] = pd_null + for idx, k in enumerate(keys): + data[k] = key_data[:, idx] + if isinstance(aggfunc, list): pieces = [] keys = [] @@ -96,8 +109,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', keys.append(func.__name__) return concat(pieces, keys=keys, axis=1) - keys = index + columns - values_passed = values is not None if values_passed: if is_list_like(values): @@ -180,6 +191,27 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if len(index) == 0 and len(columns) > 0: table = table.T + if not dropna: + if _data_null_val.size > 0: + def _convert_null_vals(indexes): + if isinstance(indexes, MultiIndex): + _new_level = [] + for _tmp_index in indexes.levels: + tmp = np.array(_tmp_index) + tmp[tmp == pd_null] = _data_null_val[0] + _new_level.append(Index(tmp, name=_tmp_index.name)) + indexes = MultiIndex(levels=_new_level, + labels=indexes.labels, + names=indexes.names) + else: + tmp = np.array(indexes) + tmp[tmp == pd_null] = _data_null_val[0] + indexes = Index(tmp, name=indexes.name) + return indexes + + table.columns = _convert_null_vals(table.columns) + table.index = _convert_null_vals(table.index) + return table diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index bfa450888616d..3afec69e26b84 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -87,6 +87,26 @@ def test_pivot_table_dropna(self): tm.assert_index_equal(pv_col.columns, m) tm.assert_index_equal(pv_ind.index, m) + df = DataFrame([[1, 'a', 'A'], [1, 'b', 'B'], [1, 'c', None]], + columns=['x', 'y', 'z']) + actual = df.pivot_table(values='x', index='y', columns='z', + aggfunc='sum', fill_value=0, margins=True, + dropna=True) + expected = pd.DataFrame([[1.0, 0.0, 1.0], [0.0, 1.0, 1.0], + [1.0, 1.0, 2.0]]) + expected.index = Index(['a', 'b', 'All'], name='y') + expected.columns = Index(['A', 'B', 'All'], name='z') + tm.assert_frame_equal(actual, expected) + + actual = df.pivot_table(values='x', index='y', columns='z', + aggfunc='sum', fill_value=0, margins=True, + dropna=False) + expected = pd.DataFrame([[1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 1.0], + [0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 1.0, 3.0]]) + expected.index = Index(['a', 'b', 'c', 'All'], name='y') + expected.columns = Index(['A', 'B', None, 'All'], name='z') + tm.assert_frame_equal(actual, expected) + def test_pass_array(self): result = self.data.pivot_table( 'D', index=self.data.A, columns=self.data.C) @@ -1080,7 +1100,8 @@ def test_margin_dropna(self): df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan], 'b': [3, np.nan, 4, 4, 4, 4]}) actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) - expected = pd.DataFrame([[1, 0, 0, 1], [0, 1, 0, 1], [0, 3, 1, 4], [1, 4, 1, 6]]) + expected = pd.DataFrame([[1, 0, 0, 1], [0, 1, 0, 1], [0, 3, 1, 4], + [1, 4, 1, 6]]) expected.index = Index([1.0, 2.0, np.nan, 'All'], name='a') expected.columns = Index([3.0, 4.0, np.nan, 'All'], name='b') tm.assert_frame_equal(actual, expected) @@ -1095,8 +1116,9 @@ def test_margin_dropna(self): actual = pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'], margins=True, dropna=False) - m = MultiIndex(levels = [Index(['All', np.nan, 'one', 'two']), - Index(['', 'dull', 'shiny'])], labels = [[1, 1, 2, 2, 3, 3, 0], + m = MultiIndex(levels=[Index(['All', np.nan, 'one', 'two']), + Index(['', 'dull', 'shiny'])], + labels=[[1, 1, 2, 2, 3, 3, 0], [1, 2, 1, 2, 1, 2, 0]], names=['b', 'c']) expected = DataFrame([[0, 0, 1, 0, 1, 0, 2], [0, 1, 2, 0, 1, 1, 5], [0, 1, 3, 0, 2, 1, 7]], columns=m) @@ -1105,13 +1127,12 @@ def test_margin_dropna(self): actual = pd.crosstab([a, b], c, rownames=['a', 'b'], colnames=['c'], margins=True, dropna=False) - - print actual.index - m = MultiIndex(levels=[['All', 'bar', 'foo'], ['', np.nan, 'one', 'two']], + m = MultiIndex(levels=[['All', 'bar', 'foo'], + ['', np.nan, 'one', 'two']], labels=[[1, 1, 1, 2, 2, 2, 0], [1, 2, 3, 1, 2, 3, 0]], names=['a', 'b']) - expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1], [2, 0, 2], [1, 1, 2], - [5, 2, 7]], index=m) + expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1], + [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m) expected.columns = Index(['dull', 'shiny', 'All'], name='c') tm.assert_frame_equal(actual, expected) From c27d3d369079d8d31927a42b9a8662ff620d6f08 Mon Sep 17 00:00:00 2001 From: OXPHOS Date: Tue, 3 Jan 2017 00:25:24 -0500 Subject: [PATCH 4/4] fix style --- pandas/tools/tests/test_pivot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 3afec69e26b84..06896835954e1 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1117,9 +1117,9 @@ def test_margin_dropna(self): colnames=['b', 'c'], margins=True, dropna=False) m = MultiIndex(levels=[Index(['All', np.nan, 'one', 'two']), - Index(['', 'dull', 'shiny'])], + Index(['', 'dull', 'shiny'])], labels=[[1, 1, 2, 2, 3, 3, 0], - [1, 2, 1, 2, 1, 2, 0]], names=['b', 'c']) + [1, 2, 1, 2, 1, 2, 0]], names=['b', 'c']) expected = DataFrame([[0, 0, 1, 0, 1, 0, 2], [0, 1, 2, 0, 1, 1, 5], [0, 1, 3, 0, 2, 1, 7]], columns=m) expected.index = Index(['bar', 'foo', 'All'], name='a') @@ -1129,8 +1129,8 @@ def test_margin_dropna(self): colnames=['c'], margins=True, dropna=False) m = MultiIndex(levels=[['All', 'bar', 'foo'], ['', np.nan, 'one', 'two']], - labels=[[1, 1, 1, 2, 2, 2, 0], [1, 2, 3, 1, 2, 3, 0]], - names=['a', 'b']) + labels=[[1, 1, 1, 2, 2, 2, 0], [1, 2, 3, 1, 2, 3, 0]], + names=['a', 'b']) expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m) expected.columns = Index(['dull', 'shiny', 'All'], name='c')