From 63bbbdeef3efa6791d9fe11167276f42a478debb Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 30 Sep 2017 14:25:01 +0200 Subject: [PATCH 1/3] Added repr string for Grouper and TimeGrouper --- pandas/core/groupby.py | 13 +++++++++++++ pandas/core/resample.py | 33 ++++++++++++++++++++++++++++++++- pandas/tests/test_resample.py | 8 ++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 967685c4e11bf..cbe2e9f1cca9c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -233,6 +233,10 @@ class Grouper(object): >>> df.groupby(Grouper(level='date', freq='60s', axis=1)) """ + _attributes = collections.OrderedDict((('key', None), ('level', None), + ('freq', None), ('axis', 0), + ('sort', False) + )) def __new__(cls, *args, **kwargs): if kwargs.get('freq') is not None: @@ -333,6 +337,15 @@ def _set_grouper(self, obj, sort=False): def groups(self): return self.grouper.groups + def __repr__(self): + defaults = self._attributes + sd = self.__dict__ + attrs = collections.OrderedDict((k, sd[k]) for k, v in defaults.items() + if k in sd and sd[k] != v) + attrs = ", ".join("{}={!r}".format(k, v) for k, v in attrs.items()) + cls_name = self.__class__.__name__ + return "{}({})".format(cls_name, attrs) + class GroupByPlot(PandasObject): """ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 5a571f9077999..ed6f84e60c4a5 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1,4 +1,5 @@ from datetime import timedelta +import collections import numpy as np import warnings import copy @@ -1025,6 +1026,16 @@ class TimeGrouper(Grouper): Use begin, end, nperiods to generate intervals that cannot be derived directly from the associated object """ + # _attributes is used in __repr__below + _attributes = Grouper._attributes.copy() + _attributes.update((('freq', 'Min'), ('closed', None), ('label', None), + ('how', 'mean'), ('nperiods', None), ('axis', 0), + ('fill_method', None), ('limit', None), + ('loffset', None), ('kind', None), + ('convention', None), ('base', 0), + ('convention', 'e'), ('sort', True), + )) + _end_types = {'M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'} def __init__(self, freq='Min', closed=None, label=None, how='mean', nperiods=None, axis=0, @@ -1032,7 +1043,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean', convention=None, base=0, **kwargs): freq = to_offset(freq) - end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W']) + end_types = self._end_types rule = freq.rule_code if (rule in end_types or ('-' in rule and rule[:rule.find('-')] in end_types)): @@ -1047,6 +1058,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean', label = 'left' self.closed = closed + self.freq = freq self.label = label self.nperiods = nperiods self.kind = kind @@ -1290,6 +1302,25 @@ def _get_period_bins(self, ax): return binner, bins, labels + def __repr__(self): + defaults = self._attributes.copy() + end_types = self._end_types + rule = self.freq.rule_code + if (rule in end_types or + ('-' in rule and rule[:rule.find('-')] in end_types)): + defaults.update(closed='right', label='right') + else: + defaults.update(closed='left', label='left') + + sd = self.__dict__ + attrs = collections.OrderedDict((k, sd[k]) for k, v in defaults.items() + if k in sd and sd[k] != v) + if 'freq' in attrs: + attrs['freq'] = attrs['freq'].freqstr + attrs = ", ".join("{}={!r}".format(k, v) for k, v in attrs.items()) + cls_name = self.__class__.__name__ + return "{}({})".format(cls_name, attrs) + def _take_new_index(obj, indexer, new_index, axis=0): from pandas.core.api import Series, DataFrame diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index ba1a2ad1f42e2..9ae79f6014301 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -3177,6 +3177,14 @@ def setup_method(self, method): self.ts = Series(np.random.randn(1000), index=date_range('1/1/2000', periods=1000)) + def test_timegrouper_repr(self): + # Added in GH17727 + result = repr(TimeGrouper(key='key', freq='50Min', label='right')) + cls_name_result, attrib_result = result.split('(') + attrib_result = set(attrib_result.rstrip(')').split(', ')) + assert cls_name_result == 'TimeGrouper' + assert attrib_result == {"key='key'", "freq='50T'", "label='right'"} + def test_apply(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): From 5595156c554f7c7477fac0d1a97a0cd04843daa9 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 3 Oct 2017 06:52:38 +0200 Subject: [PATCH 2/3] Changed _attributes to use signature + changed tests to have ordered arguments for TimeGrouper --- pandas/core/groupby.py | 15 ++++++++------- pandas/core/resample.py | 18 ++++++++---------- pandas/tests/test_resample.py | 6 ++---- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index cbe2e9f1cca9c..e1a04a2a96be0 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -9,7 +9,7 @@ from pandas.compat import ( zip, range, lzip, - callable, map + callable, map, signature ) from pandas import compat @@ -233,10 +233,6 @@ class Grouper(object): >>> df.groupby(Grouper(level='date', freq='60s', axis=1)) """ - _attributes = collections.OrderedDict((('key', None), ('level', None), - ('freq', None), ('axis', 0), - ('sort', False) - )) def __new__(cls, *args, **kwargs): if kwargs.get('freq') is not None: @@ -256,6 +252,11 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): self.indexer = None self.binner = None + # _attributes is used in __repr__below + _attributes = collections.OrderedDict( + (k, v) for k, v in zip(signature(__init__).args[1:], + signature(__init__).defaults)) + @property def ax(self): return self.grouper @@ -338,9 +339,9 @@ def groups(self): return self.grouper.groups def __repr__(self): - defaults = self._attributes sd = self.__dict__ - attrs = collections.OrderedDict((k, sd[k]) for k, v in defaults.items() + attr = self._attributes + attrs = collections.OrderedDict((k, sd[k]) for k, v in attr.items() if k in sd and sd[k] != v) attrs = ", ".join("{}={!r}".format(k, v) for k, v in attrs.items()) cls_name = self.__class__.__name__ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index ed6f84e60c4a5..01ff13dd1f81f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1026,16 +1026,6 @@ class TimeGrouper(Grouper): Use begin, end, nperiods to generate intervals that cannot be derived directly from the associated object """ - # _attributes is used in __repr__below - _attributes = Grouper._attributes.copy() - _attributes.update((('freq', 'Min'), ('closed', None), ('label', None), - ('how', 'mean'), ('nperiods', None), ('axis', 0), - ('fill_method', None), ('limit', None), - ('loffset', None), ('kind', None), - ('convention', None), ('base', 0), - ('convention', 'e'), ('sort', True), - )) - _end_types = {'M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'} def __init__(self, freq='Min', closed=None, label=None, how='mean', nperiods=None, axis=0, @@ -1080,6 +1070,14 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean', super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs) + # _attributes is used in __repr__below + _attributes = Grouper._attributes.copy() + _attributes.update( + (k, v) for k, v in zip(compat.signature(__init__).args[1:], + compat.signature(__init__).defaults)) + _attributes.update(sort=True, convention='e') + _end_types = {'M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'} + def _get_resampler(self, obj, kind=None): """ return my resampler or raise if we have an invalid axis diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 9ae79f6014301..397674c829eea 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -3180,10 +3180,8 @@ def setup_method(self, method): def test_timegrouper_repr(self): # Added in GH17727 result = repr(TimeGrouper(key='key', freq='50Min', label='right')) - cls_name_result, attrib_result = result.split('(') - attrib_result = set(attrib_result.rstrip(')').split(', ')) - assert cls_name_result == 'TimeGrouper' - assert attrib_result == {"key='key'", "freq='50T'", "label='right'"} + expected = "TimeGrouper(key='key', freq='50T', label='right')" + assert result == expected def test_apply(self): with tm.assert_produces_warning(FutureWarning, From 62f2de86b8e72acdd6d76183a6150d7c3dba8cc9 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 4 Oct 2017 20:04:04 +0200 Subject: [PATCH 3/3] New attempt at Grouper.__repr__ --- pandas/core/groupby.py | 15 +++++++-------- pandas/core/resample.py | 32 +++++--------------------------- pandas/tests/test_resample.py | 4 +++- 3 files changed, 15 insertions(+), 36 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index e1a04a2a96be0..f9ac0e67fcf05 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -234,6 +234,8 @@ class Grouper(object): >>> df.groupby(Grouper(level='date', freq='60s', axis=1)) """ + _attributes = ['key', 'level', 'freq', 'axis', 'sort'] + def __new__(cls, *args, **kwargs): if kwargs.get('freq') is not None: from pandas.core.resample import TimeGrouper @@ -252,11 +254,6 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): self.indexer = None self.binner = None - # _attributes is used in __repr__below - _attributes = collections.OrderedDict( - (k, v) for k, v in zip(signature(__init__).args[1:], - signature(__init__).defaults)) - @property def ax(self): return self.grouper @@ -339,10 +336,12 @@ def groups(self): return self.grouper.groups def __repr__(self): + grouper_defaults = compat.signature(self.__init__).defaults sd = self.__dict__ - attr = self._attributes - attrs = collections.OrderedDict((k, sd[k]) for k, v in attr.items() - if k in sd and sd[k] != v) + attrs = collections.OrderedDict() + for k, v in zip(self._attributes, grouper_defaults): + if k in sd and sd[k] != v: + attrs[k] = sd[k] attrs = ", ".join("{}={!r}".format(k, v) for k, v in attrs.items()) cls_name = self.__class__.__name__ return "{}({})".format(cls_name, attrs) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 01ff13dd1f81f..56402ea979255 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1027,6 +1027,11 @@ class TimeGrouper(Grouper): directly from the associated object """ + _attributes = ['key', 'level', 'freq', 'axis', 'sort', 'closed', 'label', + 'how', 'nperiods', 'fill_method', 'limit', + 'loffset', 'kind', 'convention', 'base'] + _end_types = {'M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'} + def __init__(self, freq='Min', closed=None, label=None, how='mean', nperiods=None, axis=0, fill_method=None, limit=None, loffset=None, kind=None, @@ -1070,14 +1075,6 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean', super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs) - # _attributes is used in __repr__below - _attributes = Grouper._attributes.copy() - _attributes.update( - (k, v) for k, v in zip(compat.signature(__init__).args[1:], - compat.signature(__init__).defaults)) - _attributes.update(sort=True, convention='e') - _end_types = {'M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'} - def _get_resampler(self, obj, kind=None): """ return my resampler or raise if we have an invalid axis @@ -1300,25 +1297,6 @@ def _get_period_bins(self, ax): return binner, bins, labels - def __repr__(self): - defaults = self._attributes.copy() - end_types = self._end_types - rule = self.freq.rule_code - if (rule in end_types or - ('-' in rule and rule[:rule.find('-')] in end_types)): - defaults.update(closed='right', label='right') - else: - defaults.update(closed='left', label='left') - - sd = self.__dict__ - attrs = collections.OrderedDict((k, sd[k]) for k, v in defaults.items() - if k in sd and sd[k] != v) - if 'freq' in attrs: - attrs['freq'] = attrs['freq'].freqstr - attrs = ", ".join("{}={!r}".format(k, v) for k, v in attrs.items()) - cls_name = self.__class__.__name__ - return "{}({})".format(cls_name, attrs) - def _take_new_index(obj, indexer, new_index, axis=0): from pandas.core.api import Series, DataFrame diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 397674c829eea..ff012c75e1531 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -3180,7 +3180,9 @@ def setup_method(self, method): def test_timegrouper_repr(self): # Added in GH17727 result = repr(TimeGrouper(key='key', freq='50Min', label='right')) - expected = "TimeGrouper(key='key', freq='50T', label='right')" + expected = ("TimeGrouper(key='key', freq=<50 * Minutes>, axis=0," + " sort=True, closed='left', label='right', how='mean', " + "loffset=None)") assert result == expected def test_apply(self):