From ef6db687ac76f1a4dbece5fa8021f2fedc4a7cb5 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 23 Sep 2016 15:33:40 -0400 Subject: [PATCH 01/15] BUG: Propagate key names in concat. Fixes a bug where `pd.concat` didn't propagate the names of keys used to create a hierarchical index. --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/tests/frame/test_combine_concat.py | 25 ++++++++++++++++------- pandas/tools/merge.py | 14 +++++++++---- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 60847469aa02c..0d146afa5ee1f 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1563,3 +1563,4 @@ Bug Fixes - ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) - Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) +- Bug in ``concat`` where names of keys were not propagated to the resulting MultiIndex (:issue:`14252`) \ No newline at end of file diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index e5aaba26135e7..aaa12b3d83111 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -4,21 +4,24 @@ from datetime import datetime -from numpy import nan import numpy as np +from numpy import nan -from pandas.compat import lrange -from pandas import DataFrame, Series, Index, Timestamp import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas import DataFrame, Index, Series, Timestamp +from pandas.compat import lrange -import pandas.util.testing as tm +from pandas.core.base import FrozenList from pandas.tests.frame.common import TestData +import pandas.util.testing as tm +from pandas.util.testing import (assertRaisesRegexp, + assert_equal, + assert_frame_equal, + assert_series_equal) + class TestDataFrameConcatCommon(tm.TestCase, TestData): @@ -324,6 +327,14 @@ def test_join_multiindex_leftright(self): assert_frame_equal(df2.join(df1, how='left'), exp[['value2', 'value1']]) + def test_concat_named_keys(self): + # GH 14252 + df = DataFrame({'foo': [1, 2, 3, 4], + 'bar': [0.1, 0.2, 0.3, 0.4]}) + index = Index(['a', 'b'], name='baz') + concatted = pd.concat([df, df], keys=index) + assert_equal(concatted.index.names, FrozenList(['baz', None])) + class TestDataFrameCombineFirst(tm.TestCase, TestData): diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 8cdde8d92b28f..44199a53ed6af 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1361,15 +1361,18 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, objs = [obj for obj in objs if obj is not None] else: # #1649 - clean_keys = [] + clean_keys_list = [] clean_objs = [] for k, v in zip(keys, objs): if v is None: continue - clean_keys.append(k) + clean_keys_list.append(k) clean_objs.append(v) objs = clean_objs - keys = clean_keys + clean_keys_index = Index(clean_keys_list) + if isinstance(keys, Index): + clean_keys_index.name = keys.name + keys = clean_keys_index if len(objs) == 0: raise ValueError('All objects passed were None') @@ -1454,7 +1457,10 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, self.axis = axis self.join_axes = join_axes self.keys = keys - self.names = names + if hasattr(keys, 'names'): + self.names = names or keys.names + else: + self.names = names self.levels = levels self.ignore_index = ignore_index From bc5f1fb21c19a6e90f592b7aaced454b9483eed7 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 23 Sep 2016 15:39:21 -0400 Subject: [PATCH 02/15] cleanup --- pandas/tools/merge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 44199a53ed6af..b8c391d5583c8 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1457,8 +1457,8 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, self.axis = axis self.join_axes = join_axes self.keys = keys - if hasattr(keys, 'names'): - self.names = names or keys.names + if names is None and hasattr(keys, 'names'): + self.names = keys.names else: self.names = names self.levels = levels From 5cd8392f43ae54ca88724a524cb2f354c91bb4c2 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 23 Sep 2016 15:43:52 -0400 Subject: [PATCH 03/15] added test for names --- pandas/tests/frame/test_combine_concat.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index aaa12b3d83111..e9f42a9eac930 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -332,8 +332,11 @@ def test_concat_named_keys(self): df = DataFrame({'foo': [1, 2, 3, 4], 'bar': [0.1, 0.2, 0.3, 0.4]}) index = Index(['a', 'b'], name='baz') - concatted = pd.concat([df, df], keys=index) - assert_equal(concatted.index.names, FrozenList(['baz', None])) + concatted_named_from_keys = pd.concat([df, df], keys=index) + assert_equal(concatted_named_from_keys.index.names, FrozenList(['baz', None])) + index_no_name = ['a', 'b'] + concatted_named_from_names = pd.concat([df, df], keys=index_no_name, names=['baz']) + assert_equal(concatted_named_from_names.index.names, FrozenList(['baz', None])) class TestDataFrameCombineFirst(tm.TestCase, TestData): From dd3c4cc656c2def58a0d4d0714296a81f0b7a244 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 23 Sep 2016 16:00:19 -0400 Subject: [PATCH 04/15] comments --- pandas/tests/frame/test_combine_concat.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index e9f42a9eac930..1670fba5f14b6 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -12,13 +12,10 @@ from pandas import DataFrame, Index, Series, Timestamp from pandas.compat import lrange -from pandas.core.base import FrozenList - from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import (assertRaisesRegexp, - assert_equal, assert_frame_equal, assert_series_equal) @@ -329,14 +326,18 @@ def test_join_multiindex_leftright(self): def test_concat_named_keys(self): # GH 14252 - df = DataFrame({'foo': [1, 2, 3, 4], - 'bar': [0.1, 0.2, 0.3, 0.4]}) + df = pd.DataFrame({'foo': [1, 2], 'bar': [0.1, 0.2]}) + df_concatted = pd.DataFrame( + {'foo': [1, 2, 1, 2], 'bar': [0.1, 0.2, 0.1, 0.2]}, + index=pd.MultiIndex.from_product((['a', 'b'], [0, 1]), + names=['baz', None])) index = Index(['a', 'b'], name='baz') concatted_named_from_keys = pd.concat([df, df], keys=index) - assert_equal(concatted_named_from_keys.index.names, FrozenList(['baz', None])) + assert_frame_equal(concatted_named_from_keys, df_concatted) index_no_name = ['a', 'b'] - concatted_named_from_names = pd.concat([df, df], keys=index_no_name, names=['baz']) - assert_equal(concatted_named_from_names.index.names, FrozenList(['baz', None])) + concatted_named_from_names = pd.concat( + [df, df], keys=index_no_name, names=['baz']) + assert_frame_equal(concatted_named_from_names, df_concatted) class TestDataFrameCombineFirst(tm.TestCase, TestData): From 5c0108b3c1adfe898b27614692d84560e456812a Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 23 Sep 2016 16:01:16 -0400 Subject: [PATCH 05/15] comments --- pandas/tools/merge.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index b8c391d5583c8..ec75f06f5f1c1 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1457,10 +1457,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, self.axis = axis self.join_axes = join_axes self.keys = keys - if names is None and hasattr(keys, 'names'): - self.names = keys.names - else: - self.names = names + self.names = names or getattr(keys, 'names', None) self.levels = levels self.ignore_index = ignore_index From 9615a69e50453cbf5d89fce943cd981d69cb7da0 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Sat, 24 Sep 2016 22:51:07 -0400 Subject: [PATCH 06/15] extra tests --- pandas/tests/frame/test_combine_concat.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 1670fba5f14b6..b7cd8a1c01224 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -327,17 +327,25 @@ def test_join_multiindex_leftright(self): def test_concat_named_keys(self): # GH 14252 df = pd.DataFrame({'foo': [1, 2], 'bar': [0.1, 0.2]}) - df_concatted = pd.DataFrame( + index = Index(['a', 'b'], name='baz') + concatted_named_from_keys = pd.concat([df, df], keys=index) + expected_named = pd.DataFrame( {'foo': [1, 2, 1, 2], 'bar': [0.1, 0.2, 0.1, 0.2]}, index=pd.MultiIndex.from_product((['a', 'b'], [0, 1]), names=['baz', None])) - index = Index(['a', 'b'], name='baz') - concatted_named_from_keys = pd.concat([df, df], keys=index) - assert_frame_equal(concatted_named_from_keys, df_concatted) - index_no_name = ['a', 'b'] + assert_frame_equal(concatted_named_from_keys, expected_named) + + index_no_name = Index(['a', 'b'], name=None) concatted_named_from_names = pd.concat( [df, df], keys=index_no_name, names=['baz']) - assert_frame_equal(concatted_named_from_names, df_concatted) + assert_frame_equal(concatted_named_from_names, expected_named) + + concatted_unnamed = pd.concat([df, df], keys=index_no_name) + expected_unnamed = pd.DataFrame( + {'foo': [1, 2, 1, 2], 'bar': [0.1, 0.2, 0.1, 0.2]}, + index=pd.MultiIndex.from_product((['a', 'b'], [0, 1]), + names=[None, None])) + assert_frame_equal(concatted_unnamed, expected_unnamed) class TestDataFrameCombineFirst(tm.TestCase, TestData): From 350e724f1f5988e333a1abe94bd57f29f4b5da55 Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 30 Sep 2016 13:34:17 -0400 Subject: [PATCH 07/15] simplified logic --- pandas/tools/merge.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index ec75f06f5f1c1..de4de8e133317 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1361,18 +1361,18 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, objs = [obj for obj in objs if obj is not None] else: # #1649 - clean_keys_list = [] + clean_keys = [] clean_objs = [] for k, v in zip(keys, objs): if v is None: continue - clean_keys_list.append(k) + clean_keys.append(k) clean_objs.append(v) objs = clean_objs - clean_keys_index = Index(clean_keys_list) if isinstance(keys, Index): - clean_keys_index.name = keys.name - keys = clean_keys_index + keys = Index(clean_keys, name=keys.name) + else: + keys = clean_keys if len(objs) == 0: raise ValueError('All objects passed were None') From 789ecd495f38fe3a84b8c0c1007d9daa06aae7bf Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 30 Sep 2016 13:38:20 -0400 Subject: [PATCH 08/15] use _ensure_index --- pandas/tools/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index de4de8e133317..2b9bbedfbf05a 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1370,7 +1370,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_objs.append(v) objs = clean_objs if isinstance(keys, Index): - keys = Index(clean_keys, name=keys.name) + keys = Index(_ensure_index(keys), name=keys.name) else: keys = clean_keys From 325611919dca3ce3d419ce52810430078131425c Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 30 Sep 2016 14:01:53 -0400 Subject: [PATCH 09/15] typo --- pandas/tools/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 2b9bbedfbf05a..71ce2cf4d542a 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1370,7 +1370,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_objs.append(v) objs = clean_objs if isinstance(keys, Index): - keys = Index(_ensure_index(keys), name=keys.name) + keys = Index(_ensure_index(clean_keys), name=keys.name) else: keys = clean_keys From b54b081f34dd0287074395819014c4a8234c087a Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 30 Sep 2016 14:13:11 -0400 Subject: [PATCH 10/15] simplified logic --- pandas/tools/merge.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 71ce2cf4d542a..66fd7b6642088 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1369,10 +1369,9 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_keys.append(k) clean_objs.append(v) objs = clean_objs - if isinstance(keys, Index): - keys = Index(_ensure_index(clean_keys), name=keys.name) - else: - keys = clean_keys + name = getattr(keys, 'name', None) + keys = _ensure_index(clean_keys) + keys.name = name if len(objs) == 0: raise ValueError('All objects passed were None') From c51df19431096651eb0cf5675dc55cc74c31657a Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 30 Sep 2016 15:16:09 -0400 Subject: [PATCH 11/15] _ensure_index --- pandas/tools/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 66fd7b6642088..ad55ed9c46731 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1370,7 +1370,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_objs.append(v) objs = clean_objs name = getattr(keys, 'name', None) - keys = _ensure_index(clean_keys) + keys = _ensure_index(Index(clean_keys)) keys.name = name if len(objs) == 0: From 44932cc3c762c043dcc9735b5a89ac1c35c6ff5d Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Fri, 7 Oct 2016 16:02:59 -0400 Subject: [PATCH 12/15] changed whatsnew entry to 0.19.1 --- doc/source/whatsnew/v0.19.0.txt | 1 - doc/source/whatsnew/v0.19.1.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 0d146afa5ee1f..60847469aa02c 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1563,4 +1563,3 @@ Bug Fixes - ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) - Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) -- Bug in ``concat`` where names of keys were not propagated to the resulting MultiIndex (:issue:`14252`) \ No newline at end of file diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 1c5f4915bb3a4..b237d095fab34 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -30,3 +30,4 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in ``concat`` where names of keys were not propagated to the resulting MultiIndex (:issue:`14252`) \ No newline at end of file From d8e2c170bcc8221741f111bb802be4cfe04a186b Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Sat, 8 Oct 2016 23:21:58 -0400 Subject: [PATCH 13/15] remove coercion to Index before _ensure_index --- pandas/tools/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index ad55ed9c46731..66fd7b6642088 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1370,7 +1370,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_objs.append(v) objs = clean_objs name = getattr(keys, 'name', None) - keys = _ensure_index(Index(clean_keys)) + keys = _ensure_index(clean_keys) keys.name = name if len(objs) == 0: From 4a301f851efd63dec4709ae1f65013ff5dab6cfe Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Sun, 9 Oct 2016 07:20:24 -0400 Subject: [PATCH 14/15] put back in Index coercion --- pandas/tools/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 66fd7b6642088..ad55ed9c46731 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1370,7 +1370,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_objs.append(v) objs = clean_objs name = getattr(keys, 'name', None) - keys = _ensure_index(clean_keys) + keys = _ensure_index(Index(clean_keys)) keys.name = name if len(objs) == 0: From cdc76f6f0e2fd5e307f3a335afc23385d1e8234f Mon Sep 17 00:00:00 2001 From: Ben Kandel Date: Sun, 9 Oct 2016 07:22:56 -0400 Subject: [PATCH 15/15] take out _ensure_index --- pandas/tools/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index ad55ed9c46731..dc8b1feef51cc 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1370,7 +1370,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_objs.append(v) objs = clean_objs name = getattr(keys, 'name', None) - keys = _ensure_index(Index(clean_keys)) + keys = Index(clean_keys) keys.name = name if len(objs) == 0: