diff --git a/doc/source/release.rst b/doc/source/release.rst index 14ed2cab10eac..4d810e834b2a8 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -217,6 +217,7 @@ See :ref:`Internal Refactoring` of a duplicate index (:issue:`4359`) - In ``to_json``, fix date handling so milliseconds are the default timestamp as the docstring says (:issue:`4362`). + - ``as_index`` is no longer ignored when doing groupby apply (:issue:`4648`), (:issue:`3417`) - JSON NaT handling fixed, NaTs are now serialised to `null` (:issue:`4498`) - Fixed JSON handling of escapable characters in JSON object keys (:issue:`4593`) - Fixed passing ``keep_default_na=False`` when ``na_values=None`` (:issue:`4318`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 1f15f1a8ae10d..f0ba0c3b54f4a 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -516,7 +516,7 @@ def _concat_objects(self, keys, values, not_indexed_same=False): result = result.reindex(ax) else: result = result.reindex_axis(ax, axis=self.axis) - elif self.group_keys: + elif self.group_keys and self.as_index: group_keys = keys group_levels = self.grouper.levels group_names = self.grouper.names diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 9e7cdf9df2c6b..4bd44fcf26bb3 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -12,7 +12,8 @@ from pandas.core.groupby import GroupByError, SpecificationError, DataError from pandas.core.series import Series from pandas.util.testing import (assert_panel_equal, assert_frame_equal, - assert_series_equal, assert_almost_equal) + assert_series_equal, assert_almost_equal, + assert_index_equal) from pandas.compat import( range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict ) @@ -1178,6 +1179,34 @@ def test_groupby_as_index_corner(self): self.assertRaises(ValueError, self.df.groupby, lambda x: x.lower(), as_index=False, axis=1) + def test_groupby_as_index_apply(self): + # GH #4648 and #3417 + df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'], + 'user_id': [1,2,1,1,3,1], + 'time': range(6)}) + + g_as = df.groupby('user_id', as_index=True) + g_not_as = df.groupby('user_id', as_index=False) + + res_as = g_as.head(2).index + exp_as = MultiIndex.from_tuples([(1, 0), (1, 2), (2, 1), (3, 4)]) + assert_index_equal(res_as, exp_as) + + res_not_as = g_not_as.head(2).index + exp_not_as = Index([0, 2, 1, 4]) + assert_index_equal(res_not_as, exp_not_as) + + res_as = g_as.apply(lambda x: x.head(2)).index + assert_index_equal(res_not_as, exp_not_as) + + res_not_as = g_not_as.apply(lambda x: x.head(2)).index + assert_index_equal(res_not_as, exp_not_as) + + ind = Index(list('abcde')) + df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind) + res = df.groupby(0, as_index=False).apply(lambda x: x).index + assert_index_equal(res, ind) + def test_groupby_multiple_key(self): df = tm.makeTimeDataFrame() grouped = df.groupby([lambda x: x.year,