pandas-dev · TomAugspurger · May 4, 2014 · May 4, 2014 · May 4, 2014 · May 5, 2014
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1917,8 +1917,15 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
     any_callable = any(callable(g) or isinstance(g, dict) for g in keys)
     any_arraylike = any(isinstance(g, (list, tuple, Series, np.ndarray))
                         for g in keys)
+    # sugar for df.reset_index().groupby(['a', 'b']) where b was in index_names
+    from_col, from_idx, from_both = _from_index_and_columns(obj, keys)
 
     try:
+        if from_idx and from_col:
+            # check the drop part...
+            obj = obj.reset_index(level=list(from_idx)).reset_index(drop=True)
+            group_axis = obj._get_axis(axis)
+
         if isinstance(obj, DataFrame):
             all_in_columns = all(g in obj.columns for g in keys)
         else:
@@ -1940,6 +1947,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
 
     groupings = []
     exclusions = []
+
     for i, (gpr, level) in enumerate(zip(keys, levels)):
         name = None
         try:
@@ -1969,6 +1977,31 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
     return grouper, exclusions, obj
 
 
+def _from_index_and_columns(obj, keys):
+    """
+    keys is already listlike
+    """
+    not_all_string = not all(isinstance(g, compat.string_types) for g in keys)
+    not_df = not isinstance(obj, DataFrame)
+    if not_all_string or not_df:
+        # TODO: Handle mix of callables and strings.
+        return None, None, None
+    ks = set(keys)
+    from_idx = ks & set(obj.index.names)
+    from_col = ks & set(obj.columns)
+
+    # check for ambiguity:
+    from_both = from_idx & from_col
+    if from_both:
+        from warnings import warn
+        msg = ("Found {0} in both the columns and index labels. "
+               "Grouping by the columns".format(from_both),)
+        warn(msg, FutureWarning)
+
+    # don't need to do anything if the only ones from either are in both
+    return from_col, from_idx - from_both, from_both
+
+
 def _is_label_like(val):
     return isinstance(val, compat.string_types) or np.isscalar(val)
 

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -11,7 +11,8 @@
 from pandas.core.common import rands
 from pandas.core.api import Categorical, DataFrame
 from pandas.core.groupby import (SpecificationError, DataError,
-                                 _nargsort, _lexsort_indexer)
+                                 _nargsort, _lexsort_indexer,
+                                 _from_index_and_columns)
 from pandas.core.series import Series
 from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
                                  assert_series_equal, assert_almost_equal,
@@ -4168,6 +4169,56 @@ def test_nargsort(self):
         expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
         assert_equal(result, expected)
 
+    def test_by_index_cols(self):
+        df = DataFrame([[1, 2, 'x', 'a', 'a'],
+                        [2, 3, 'x', 'a', 'b'],
+                        [3, 4, 'x', 'b', 'a'],
+                        [4, 5, 'y', 'b', 'b']],
+                       columns=['c1', 'c2', 'g1', 'i1', 'i2'])
+        df = df.set_index(['i1', 'i2'])
+        df.index.set_names(['i1', 'g1'], inplace=True)
+        result = df.groupby(by=['g1', 'i1']).mean()
+        idx = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'b')],
+                                     names=['g1', 'i1'])
+        expected = DataFrame([[1.5, 2.5], [1, 4], [1, 5]],
+                             index=idx, columns=['c1', 'c2'])
+        assert_frame_equal(result, expected)
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.groupby('g1').mean()
+        expected = DataFrame([[2, 3], [4, 5]],
+                             index=['x', 'y'], columns=['c1', 'c2'])
+        expected.index.set_names(['g1'], inplace=True)
+        assert_frame_equal(result, expected)
+
+    def test_from_index_and_columns(self):
+        # allowing by to spread across index and col names GH #5677
+        df = DataFrame([[1, 2, 3, 4]], columns=['c1', 'c2', 'i1', 'i2'])
+        df = df.set_index(['i1', 'i2'])
+
+        keys = ['c1']
+        from_col, from_idx, from_both = _from_index_and_columns(df, keys)
+        self.assertEqual(from_col, set(['c1']))
+        self.assertEqual(from_idx, set([]))
+        self.assertEqual(from_both, set([]))
+
+        keys = ['c1', 'i1']
+        from_col, from_idx, from_both = _from_index_and_columns(df, keys)
+        self.assertEqual(from_col, set(['c1']))
+        self.assertEqual(from_idx, set(['i1']))
+        self.assertEqual(from_both, set([]))
+
+        df.index.names = ['i1', 'c1']
+        keys = ['c1', 'i1']
+        with tm.assert_produces_warning(FutureWarning):
+            from_col, from_idx, from_both = _from_index_and_columns(df, keys)
+        self.assertEqual(from_col, set(['c1']))
+        self.assertEqual(from_idx, set(['i1']))
+        self.assertEqual(from_both, set(['c1']))
+
+        res = _from_index_and_columns(df['c1'], 'i1')
+        self.assertEqual(res, (None, None, None))
+
 def assert_fp_equal(a, b):
     assert (np.abs(a - b) < 1e-12).all()