From 9138150ae7f68c14af927cb3ac937cf55f2fe36b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 22 Jun 2018 21:03:44 -0700 Subject: [PATCH 1/6] cache harder --- pandas/_libs/tslibs/offsets.pyx | 24 ++++++++++++++++++++++++ pandas/tseries/offsets.py | 18 ------------------ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 63add06db17b4..1c506801e1e66 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -313,6 +313,30 @@ class _BaseOffset(object): def __setattr__(self, name, value): raise AttributeError("DateOffset objects are immutable.") + def __eq__(self, other): + if is_string_object(other): + from pandas.tseries.frequencies import to_offset + other = to_offset(other) + + try: + return self._params == other._params + except AttributeError: + # other is not a DateOffset object + return False + + return self._params == other._params + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._params) + + @property + def _params(self): + from pandas.errors import AbstractMethodError + raise AbstractMethodError(self) + @property def kwds(self): # for backwards-compatibility diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index a3f82c1a0902e..2c364a26c4632 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -322,24 +322,6 @@ def _repr_attrs(self): def name(self): return self.rule_code - def __eq__(self, other): - - if isinstance(other, compat.string_types): - from pandas.tseries.frequencies import to_offset - - other = to_offset(other) - - if not isinstance(other, DateOffset): - return False - - return self._params == other._params - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self._params) - def __add__(self, other): if isinstance(other, (ABCDatetimeIndex, ABCSeries)): return other + self From f2fff9e3429e9072064cb298faac7d59d6f60c70 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Jun 2018 11:06:14 -0700 Subject: [PATCH 2/6] Move params implementation to cython --- pandas/_libs/tslibs/offsets.pyx | 13 ++++++++++--- pandas/tseries/offsets.py | 14 ++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 1c506801e1e66..9765d3a00717b 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -332,10 +332,17 @@ class _BaseOffset(object): def __hash__(self): return hash(self._params) - @property + @property # NB: non-cython subclasses override with cache_readonly def _params(self): - from pandas.errors import AbstractMethodError - raise AbstractMethodError(self) + all_paras = self.__dict__.copy() + if 'holidays' in all_paras and not all_paras['holidays']: + all_paras.pop('holidays') + exclude = ['kwds', 'name', 'calendar'] + attrs = [(k, v) for k, v in all_paras.items() + if (k not in exclude) and (k[0] != '_')] + attrs = sorted(set(attrs)) + params = tuple([str(self.__class__)] + attrs) + return params @property def kwds(self): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 2c364a26c4632..9ab65f148cf9c 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -182,6 +182,8 @@ def __add__(date): Since 0 is a bit weird, we suggest avoiding its use. """ + _params = cache_readony(BaseOffset._params.fget) + _use_relativedelta = False _adjust_dst = False _attributes = frozenset(['n', 'normalize'] + @@ -288,18 +290,6 @@ def isAnchored(self): # if there were a canonical docstring for what isAnchored means. return (self.n == 1) - @cache_readonly - def _params(self): - all_paras = self.__dict__.copy() - if 'holidays' in all_paras and not all_paras['holidays']: - all_paras.pop('holidays') - exclude = ['kwds', 'name', 'calendar'] - attrs = [(k, v) for k, v in all_paras.items() - if (k not in exclude) and (k[0] != '_')] - attrs = sorted(set(attrs)) - params = tuple([str(self.__class__)] + attrs) - return params - # TODO: Combine this with BusinessMixin version by defining a whitelisted # set of attributes on each object rather than the existing behavior of # iterating over internal ``__dict__`` From eb69dd381cf138b371dacf75dfce86c5c9ef56d0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Jun 2018 11:06:57 -0700 Subject: [PATCH 3/6] update whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 90fc579ae69e5..1450d63a8be25 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -130,7 +130,7 @@ Performance Improvements - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`) - Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`) -- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`) +- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`,:issue:`21606`) - .. _whatsnew_0240.docs: From 04c8e895a2edbe23f8717361fe1dd836e7880e36 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Jun 2018 11:18:38 -0700 Subject: [PATCH 4/6] typo fixup --- pandas/tseries/offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 9ab65f148cf9c..32bedb4d41950 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -182,7 +182,7 @@ def __add__(date): Since 0 is a bit weird, we suggest avoiding its use. """ - _params = cache_readony(BaseOffset._params.fget) + _params = cache_readonly(BaseOffset._params.fget) _use_relativedelta = False _adjust_dst = False From 7c9b121b60b48c1f82f81677effc2a61c9ec1da5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 26 Jun 2018 10:41:01 -0700 Subject: [PATCH 5/6] comment, whitespace --- pandas/_libs/tslibs/offsets.pyx | 7 ++++++- pandas/tseries/offsets.py | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 9765d3a00717b..083b6b04f27ca 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -332,8 +332,13 @@ class _BaseOffset(object): def __hash__(self): return hash(self._params) - @property # NB: non-cython subclasses override with cache_readonly + @property def _params(self): + """ + Returns a tuple containing all of the attributes needed to evaluate + equality between two DateOffset objects. + """ + # NB: non-cython subclasses override property with cache_readonly all_paras = self.__dict__.copy() if 'holidays' in all_paras and not all_paras['holidays']: all_paras.pop('holidays') diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 32bedb4d41950..1cfd3f476f8ab 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -183,7 +183,6 @@ def __add__(date): Since 0 is a bit weird, we suggest avoiding its use. """ _params = cache_readonly(BaseOffset._params.fget) - _use_relativedelta = False _adjust_dst = False _attributes = frozenset(['n', 'normalize'] + From 92e6fdd60a5a48ba0f5d23bb7a706c7f65ba0a93 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 26 Jun 2018 10:44:05 -0700 Subject: [PATCH 6/6] implement to_offset --- pandas/_libs/tslibs/offsets.pyx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 083b6b04f27ca..b4b27b99bdb30 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -88,6 +88,15 @@ for _d in DAYS: # --------------------------------------------------------------------- # Misc Helpers +cdef to_offset(object obj): + """ + Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime + imports + """ + from pandas.tseries.frequencies import to_offset + return to_offset(obj) + + def as_datetime(obj): f = getattr(obj, 'to_pydatetime', None) if f is not None: @@ -315,7 +324,6 @@ class _BaseOffset(object): def __eq__(self, other): if is_string_object(other): - from pandas.tseries.frequencies import to_offset other = to_offset(other) try: