From 9489cb273e7bbeddd2f20bbce72a50b1ed5bee33 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Wed, 11 Jan 2017 23:38:40 -0500 Subject: [PATCH 1/5] BUG: Fix issue #14848 groupby().describe() on indices containing all tuples --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/groupby/test_groupby.py | 13 +++++++++++++ pandas/tools/merge.py | 6 +++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c82dc370e3e71..b9b9c6d7dfb90 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -351,6 +351,7 @@ Bug Fixes - Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`) - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) +- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index b00dc62206f57..cd9c0418d528a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1490,6 +1490,19 @@ def test_frame_describe_multikey(self): for name, group in groupedT: assert_frame_equal(result[name], group.describe()) + # GH #14848 + def test_frame_describe_tupleindex(self): + df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3, + 'y': [10, 20, 30, 40, 50] * 3, + 'z': [100, 200, 300, 400, 500] * 3}) + df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 + df2 = df1.rename(columns={'k': 'key'}) + des1 = df1.groupby('k').describe() + des2 = df2.groupby('key').describe() + if len(des1) > 0: + des2.index.set_names(des1.index.names, inplace=True) + assert_frame_equal(des1, des2) + def test_frame_groupby(self): grouped = self.tsframe.groupby(lambda x: x.weekday()) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 4012629aa3c90..16c5d46e3b4b4 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1626,7 +1626,11 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_objs.append(v) objs = clean_objs name = getattr(keys, 'name', None) - keys = Index(clean_keys, name=name) + # GH 14848 + # Don't pass name when creating index (# GH 14252) + # So that if keys are tuples, name isn't checked + keys = Index(clean_keys) + keys.name = name if len(objs) == 0: raise ValueError('All objects passed were None') From f3a7a21f83ff14314a565e406fffc0a50b7d0f54 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 12 Jan 2017 11:59:03 -0500 Subject: [PATCH 2/5] Changes per jreback requests --- pandas/tests/groupby/test_groupby.py | 12 ++++++------ pandas/tools/merge.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index cd9c0418d528a..8e61fa3a5fb66 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1490,18 +1490,18 @@ def test_frame_describe_multikey(self): for name, group in groupedT: assert_frame_equal(result[name], group.describe()) - # GH #14848 def test_frame_describe_tupleindex(self): + + # GH 14848 - regression from 0.19.0 to 0.19.1 df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3, 'y': [10, 20, 30, 40, 50] * 3, 'z': [100, 200, 300, 400, 500] * 3}) df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 df2 = df1.rename(columns={'k': 'key'}) - des1 = df1.groupby('k').describe() - des2 = df2.groupby('key').describe() - if len(des1) > 0: - des2.index.set_names(des1.index.names, inplace=True) - assert_frame_equal(des1, des2) + result = df1.groupby('k').describe() + expected = df2.groupby('key').describe() + expected.index.set_names(result.index.names, inplace=True) + assert_frame_equal(result, expected) def test_frame_groupby(self): grouped = self.tsframe.groupby(lambda x: x.weekday()) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 16c5d46e3b4b4..eb0a263d43d29 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1627,10 +1627,10 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, objs = clean_objs name = getattr(keys, 'name', None) # GH 14848 - # Don't pass name when creating index (# GH 14252) - # So that if keys are tuples, name isn't checked - keys = Index(clean_keys) - keys.name = name + # If you already have an Index, no need + # to recreate it + if not isinstance(keys, Index): + keys = Index(clean_keys, name=name) if len(objs) == 0: raise ValueError('All objects passed were None') From fbd20f5218e25a30177c90afe1693314a88cec7d Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 12 Jan 2017 15:22:09 -0500 Subject: [PATCH 3/5] Raise error when creating index of tuples with name parameter a string --- pandas/indexes/multi.py | 6 ++++++ pandas/tests/indexes/test_multi.py | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 132543e0e386c..776f190989220 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -20,6 +20,7 @@ is_object_dtype, is_iterator, is_list_like, + is_string_like, is_scalar) from pandas.types.missing import isnull, array_equivalent from pandas.core.common import (_values_from_object, @@ -490,6 +491,11 @@ def _set_names(self, names, level=None, validate=True): that it only acts on copies """ + # GH 15110 + # Don't allow a single string for names in a MultiIndex + if names is not None and is_string_like(names): + raise ValueError('Names should not be a single string for a ' + 'MultiIndex.') names = list(names) if validate and level is not None and len(names) != len(level): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 16831219e0930..2861a1f56b24b 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2554,3 +2554,12 @@ def test_unsortedindex(self): with assertRaises(KeyError): df.loc(axis=0)['q', :] + + def test_tuples_with_name_string(self): + # GH 15110 and GH 14848 + + li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] + with assertRaises(ValueError): + pd.Index(li, name='abc') + with assertRaises(ValueError): + pd.Index(li, name='a') From db13c3bfe2402a69e0f74fc8ba89410b5aecfc07 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 12 Jan 2017 18:25:10 -0500 Subject: [PATCH 4/5] Use not is_list_like --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/indexes/multi.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index b9b9c6d7dfb90..d09b7c0870870 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -351,7 +351,7 @@ Bug Fixes - Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`) - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) -- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) +- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples. Raise `ValueError` if creating an `Index` with tuples and not passing a list of names (:issue:`14848`) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 776f190989220..2afafaeb544d1 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -20,7 +20,6 @@ is_object_dtype, is_iterator, is_list_like, - is_string_like, is_scalar) from pandas.types.missing import isnull, array_equivalent from pandas.core.common import (_values_from_object, @@ -493,9 +492,8 @@ def _set_names(self, names, level=None, validate=True): # GH 15110 # Don't allow a single string for names in a MultiIndex - if names is not None and is_string_like(names): - raise ValueError('Names should not be a single string for a ' - 'MultiIndex.') + if names is not None and not is_list_like(names): + raise ValueError('Names should be list-like for a MultiIndex') names = list(names) if validate and level is not None and len(names) != len(level): From c18c6cbc01e8b2a0fc564efdebc2a203e2201f32 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Fri, 13 Jan 2017 11:36:14 -0500 Subject: [PATCH 5/5] Undo change to merge.py and make whatsnew a 2 line comment. --- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/tools/merge.py | 6 +----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d09b7c0870870..3f72259c0838c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -351,7 +351,8 @@ Bug Fixes - Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`) - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) -- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples. Raise `ValueError` if creating an `Index` with tuples and not passing a list of names (:issue:`14848`) +- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) +- Raise `ValueError` if creating a `MultiIndex` with tuples and not passing a list of names (:issue:`15110`) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index eb0a263d43d29..4012629aa3c90 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1626,11 +1626,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_objs.append(v) objs = clean_objs name = getattr(keys, 'name', None) - # GH 14848 - # If you already have an Index, no need - # to recreate it - if not isinstance(keys, Index): - keys = Index(clean_keys, name=name) + keys = Index(clean_keys, name=name) if len(objs) == 0: raise ValueError('All objects passed were None')