diff --git a/doc/source/release.rst b/doc/source/release.rst index e37a7c7eab861..0e5280f1c5306 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -365,6 +365,7 @@ Bug Fixes would only replace the first occurrence of a value (:issue:`6689`) - Better error message when passing a frequency of 'MS' in ``Period`` construction (GH5332) - Bug in `Series.__unicode__` when `max_rows` is `None` and the Series has more than 1000 rows. (:issue:`6863`) +- Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`) pandas 0.13.1 ------------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 033cdf5a81318..a32b25312d4ba 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,9 +1,11 @@ import types from functools import wraps import numpy as np +import datetime from pandas.compat import( - zip, builtins, range, long, lrange, lzip, OrderedDict, callable + zip, builtins, range, long, lrange, lzip, + OrderedDict, callable ) from pandas import compat @@ -402,14 +404,32 @@ def indices(self): return self.grouper.indices def _get_index(self, name): - """ safe get index """ - try: - return self.indices[name] - except: - if isinstance(name, Timestamp): - name = name.value - return self.indices[name] - raise + """ safe get index, translate keys for datelike to underlying repr """ + + def convert(key, s): + # possibly convert to they actual key types + # in the indices, could be a Timestamp or a np.datetime64 + + if isinstance(s, (Timestamp,datetime.datetime)): + return Timestamp(key) + elif isinstance(s, np.datetime64): + return Timestamp(key).asm8 + return key + + sample = list(self.indices)[0] + if isinstance(sample, tuple): + if not isinstance(name, tuple): + raise ValueError("must supply a tuple to get_group with multiple grouping keys") + if not len(name) == len(sample): + raise ValueError("must supply a a same-length tuple to get_group with multiple grouping keys") + + name = tuple([ convert(n, k) for n, k in zip(name,sample) ]) + + else: + + name = convert(name, sample) + + return self.indices[name] @property def name(self): @@ -554,7 +574,7 @@ def apply(self, func, *args, **kwargs): path. This can lead to unexpected behavior if func has side-effects, as they will take effect twice for the first group. - + See also -------- diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 540ce1cc61929..a7f7223172848 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -17,7 +17,8 @@ assert_series_equal, assert_almost_equal, assert_index_equal) from pandas.compat import( - range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict + range, long, lrange, StringIO, lmap, lzip, map, + zip, builtins, OrderedDict ) from pandas import compat from pandas.core.panel import Panel @@ -479,6 +480,36 @@ def test_get_group(self): expected = wp.reindex(major=[x for x in wp.major_axis if x.month == 1]) assert_panel_equal(gp, expected) + + # GH 5267 + # be datelike friendly + df = DataFrame({'DATE' : pd.to_datetime(['10-Oct-2013', '10-Oct-2013', '10-Oct-2013', + '11-Oct-2013', '11-Oct-2013', '11-Oct-2013']), + 'label' : ['foo','foo','bar','foo','foo','bar'], + 'VAL' : [1,2,3,4,5,6]}) + + g = df.groupby('DATE') + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group(Timestamp(key).to_datetime()) + result3 = g.get_group(str(Timestamp(key))) + assert_frame_equal(result1,result2) + assert_frame_equal(result1,result3) + + g = df.groupby(['DATE','label']) + + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group((Timestamp(key[0]).to_datetime(),key[1])) + result3 = g.get_group((str(Timestamp(key[0])),key[1])) + assert_frame_equal(result1,result2) + assert_frame_equal(result1,result3) + + # must pass a same-length tuple with multiple keys + self.assertRaises(ValueError, lambda : g.get_group('foo')) + self.assertRaises(ValueError, lambda : g.get_group(('foo'))) + self.assertRaises(ValueError, lambda : g.get_group(('foo','bar','baz'))) + def test_agg_apply_corner(self): # nothing to group, all NA grouped = self.ts.groupby(self.ts * np.nan)