From 2800fa0e5236920ebb9c3cae46f871a2e2f76d04 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <thomas-augspurger@uiowa.edu>
Date: Sun, 4 May 2014 16:24:16 -0500
Subject: [PATCH 1/4] add the grouping code

---
 pandas/core/groupby.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index ce64ed754180d..497902224a19d 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1917,8 +1917,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
     any_callable = any(callable(g) or isinstance(g, dict) for g in keys)
     any_arraylike = any(isinstance(g, (list, tuple, Series, np.ndarray))
                         for g in keys)
+    # sugar for df.reset_index().groupby(['a', 'b']) where b was in index_names
+    from_col, from_idx, from_both = _from_index_and_columns(obj, keys)
 
     try:
+        if from_idx and from_col:
+            to_exclude = set(obj.index.names) - from_idx
+            obj = obj.reset_index()
+            group_axis = obj._get_axis(axis)
         if isinstance(obj, DataFrame):
             all_in_columns = all(g in obj.columns for g in keys)
         else:
@@ -1940,6 +1946,12 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
 
     groupings = []
     exclusions = []
+
+    if from_col and from_idx:
+        # don't include those just there becaues of the reset_index
+        if to_exclude:
+            exclusions += list(to_exclude)
+
     for i, (gpr, level) in enumerate(zip(keys, levels)):
         name = None
         try:
@@ -1969,6 +1981,29 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
     return grouper, exclusions, obj
 
 
+def _from_index_and_columns(obj, keys):
+    """
+    keys is already listlike
+    """
+    if not all(isinstance(g, compat.string_types) for g in keys):
+        # TODO: Handle mix of callables and strings.
+        return None, None, None
+    ks = set(keys)
+    from_idx = ks & set(obj.index.names)
+    from_col = ks & set(obj.columns)
+
+    # check for ambiguity:
+    from_both = from_idx & from_col
+    if from_both:
+        from warnings import warn
+        msg = ("Found {0} in both the columns and index labels. "
+               "Grouping by the columns".format(from_both))
+        warn(msg)
+
+    # don't need to do anything if the only ones from either are in both
+    return from_col, from_idx - from_both, from_both
+
+
 def _is_label_like(val):
     return isinstance(val, compat.string_types) or np.isscalar(val)
 

From 45dc31b78b01813f0b0eaf66c4a8920f3ea62482 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <thomas-augspurger@uiowa.edu>
Date: Sun, 4 May 2014 16:40:29 -0500
Subject: [PATCH 2/4] add tests

---
 pandas/core/groupby.py       |  4 +++-
 pandas/tests/test_groupby.py | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 497902224a19d..2d14262ce6e02 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1985,7 +1985,9 @@ def _from_index_and_columns(obj, keys):
     """
     keys is already listlike
     """
-    if not all(isinstance(g, compat.string_types) for g in keys):
+    not_all_string = not all(isinstance(g, compat.string_types) for g in keys)
+    not_df = not isinstance(obj, DataFrame)
+    if not_all_string or not_df:
         # TODO: Handle mix of callables and strings.
         return None, None, None
     ks = set(keys)
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 1b70ae0309b10..18c4aaf8bc76f 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -11,7 +11,8 @@
 from pandas.core.common import rands
 from pandas.core.api import Categorical, DataFrame
 from pandas.core.groupby import (SpecificationError, DataError,
-                                 _nargsort, _lexsort_indexer)
+                                 _nargsort, _lexsort_indexer,
+                                 _from_index_and_columns)
 from pandas.core.series import Series
 from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
                                  assert_series_equal, assert_almost_equal,
@@ -4168,6 +4169,34 @@ def test_nargsort(self):
         expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
         assert_equal(result, expected)
 
+    def test_from_index_and_columns(self):
+        # allowing by to spread across index and col names GH #5677
+        df = DataFrame([[1, 2, 3, 4]], columns=['c1', 'c2', 'i1', 'i2'])
+        df = df.set_index(['i1', 'i2'])
+
+        keys = ['c1']
+        from_col, from_idx, from_both = _from_index_and_columns(df, keys)
+        self.assertEqual(from_col, set(['c1']))
+        self.assertEqual(from_idx, set([]))
+        self.assertEqual(from_both, set([]))
+
+        keys = ['c1', 'i1']
+        from_col, from_idx, from_both = _from_index_and_columns(df, keys)
+        self.assertEqual(from_col, set(['c1']))
+        self.assertEqual(from_idx, set(['i1']))
+        self.assertEqual(from_both, set([]))
+
+        df.index.names = ['i1', 'c1']
+        keys = ['c1', 'i1']
+        with tm.assert_produces_warning(UserWarning):
+            from_col, from_idx, from_both = _from_index_and_columns(df, keys)
+        self.assertEqual(from_col, set(['c1']))
+        self.assertEqual(from_idx, set(['i1']))
+        self.assertEqual(from_both, set(['c1']))
+
+        res = _from_index_and_columns(df['c1'], 'i1')
+        self.assertEqual(res, (None, None, None))
+
 def assert_fp_equal(a, b):
     assert (np.abs(a - b) < 1e-12).all()
 

From 8a79ea150318b816d0bfd3fde95cb8a5fdd2fd7d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <thomas-augspurger@uiowa.edu>
Date: Sun, 4 May 2014 18:47:30 -0500
Subject: [PATCH 3/4] refactor index resetting

---
 pandas/core/groupby.py       | 10 +++-------
 pandas/tests/test_groupby.py | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 2d14262ce6e02..7f377d7cb425a 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1922,9 +1922,10 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
 
     try:
         if from_idx and from_col:
-            to_exclude = set(obj.index.names) - from_idx
-            obj = obj.reset_index()
+            # check the drop part...
+            obj = obj.reset_index(level=list(from_idx)).reset_index(drop=True)
             group_axis = obj._get_axis(axis)
+
         if isinstance(obj, DataFrame):
             all_in_columns = all(g in obj.columns for g in keys)
         else:
@@ -1947,11 +1948,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
     groupings = []
     exclusions = []
 
-    if from_col and from_idx:
-        # don't include those just there becaues of the reset_index
-        if to_exclude:
-            exclusions += list(to_exclude)
-
     for i, (gpr, level) in enumerate(zip(keys, levels)):
         name = None
         try:
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 18c4aaf8bc76f..2fd35734e0265 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -4169,6 +4169,21 @@ def test_nargsort(self):
         expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
         assert_equal(result, expected)
 
+    def test_by_index_cols(self):
+        df = DataFrame([[1, 2, 'x', 'a', 'a'],
+                        [1, 3, 'x', 'a', 'b'],
+                        [1, 4, 'x', 'b', 'a'],
+                        [1, 5, 'y', 'b', 'b']],
+                       columns=['c1', 'c2', 'g1', 'i1', 'i2'])
+        df = df.set_index(['i1', 'i2'])
+        df.index.names = ['i1', 'g1']
+        result = df.groupby(by=['g1', 'i1']).mean()
+        idx = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'b')],
+                                     names=['g1', 'i1'])
+        expected = DataFrame([[1, 2.5], [1, 4], [1, 5]],
+                             index=idx, columns=['c1', 'c2'])
+        assert_frame_equal(result, expected)
+
     def test_from_index_and_columns(self):
         # allowing by to spread across index and col names GH #5677
         df = DataFrame([[1, 2, 3, 4]], columns=['c1', 'c2', 'i1', 'i2'])

From 6be446b9d6a73d675998a72273e5ebcab0086fe9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <thomas-augspurger@uiowa.edu>
Date: Mon, 5 May 2014 06:38:06 -0500
Subject: [PATCH 4/4] add test [ci skip]

---
 pandas/core/groupby.py       |  4 ++--
 pandas/tests/test_groupby.py | 19 +++++++++++++------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 7f377d7cb425a..9047f1504aa1a 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1995,8 +1995,8 @@ def _from_index_and_columns(obj, keys):
     if from_both:
         from warnings import warn
         msg = ("Found {0} in both the columns and index labels. "
-               "Grouping by the columns".format(from_both))
-        warn(msg)
+               "Grouping by the columns".format(from_both),)
+        warn(msg, FutureWarning)
 
     # don't need to do anything if the only ones from either are in both
     return from_col, from_idx - from_both, from_both
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 2fd35734e0265..ae06e60724264 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -4171,19 +4171,26 @@ def test_nargsort(self):
 
     def test_by_index_cols(self):
         df = DataFrame([[1, 2, 'x', 'a', 'a'],
-                        [1, 3, 'x', 'a', 'b'],
-                        [1, 4, 'x', 'b', 'a'],
-                        [1, 5, 'y', 'b', 'b']],
+                        [2, 3, 'x', 'a', 'b'],
+                        [3, 4, 'x', 'b', 'a'],
+                        [4, 5, 'y', 'b', 'b']],
                        columns=['c1', 'c2', 'g1', 'i1', 'i2'])
         df = df.set_index(['i1', 'i2'])
-        df.index.names = ['i1', 'g1']
+        df.index.set_names(['i1', 'g1'], inplace=True)
         result = df.groupby(by=['g1', 'i1']).mean()
         idx = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'b')],
                                      names=['g1', 'i1'])
-        expected = DataFrame([[1, 2.5], [1, 4], [1, 5]],
+        expected = DataFrame([[1.5, 2.5], [1, 4], [1, 5]],
                              index=idx, columns=['c1', 'c2'])
         assert_frame_equal(result, expected)
 
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.groupby('g1').mean()
+        expected = DataFrame([[2, 3], [4, 5]],
+                             index=['x', 'y'], columns=['c1', 'c2'])
+        expected.index.set_names(['g1'], inplace=True)
+        assert_frame_equal(result, expected)
+
     def test_from_index_and_columns(self):
         # allowing by to spread across index and col names GH #5677
         df = DataFrame([[1, 2, 3, 4]], columns=['c1', 'c2', 'i1', 'i2'])
@@ -4203,7 +4210,7 @@ def test_from_index_and_columns(self):
 
         df.index.names = ['i1', 'c1']
         keys = ['c1', 'i1']
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(FutureWarning):
             from_col, from_idx, from_both = _from_index_and_columns(df, keys)
         self.assertEqual(from_col, set(['c1']))
         self.assertEqual(from_idx, set(['i1']))