From 3a3e4fcbe617538530c8138e262d04f2b10f2ede Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 May 2020 14:24:49 -0700 Subject: [PATCH 1/2] PERF: Datetimelike lookups --- pandas/core/arrays/_mixins.py | 9 ++++++--- pandas/core/arrays/datetimelike.py | 9 +++++++++ pandas/core/arrays/datetimes.py | 3 ++- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/timedeltas.py | 3 ++- 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index d1f8957859337..832d09b062265 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -4,6 +4,7 @@ from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly from pandas.core.algorithms import take, unique from pandas.core.arrays.base import ExtensionArray @@ -64,6 +65,8 @@ def _validate_fill_value(self, fill_value): # ------------------------------------------------------------------------ + # TODO: make this a cache_readonly; for that to work we need to remove + # the _index_data kludge in libreduction @property def shape(self) -> Tuple[int, ...]: return self._ndarray.shape @@ -71,15 +74,15 @@ def shape(self) -> Tuple[int, ...]: def __len__(self) -> int: return self.shape[0] - @property + @cache_readonly def ndim(self) -> int: return len(self.shape) - @property + @cache_readonly def size(self) -> int: return np.prod(self.shape) - @property + @cache_readonly def nbytes(self) -> int: return self._ndarray.nbytes diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e07e2da164cac..257a51b423308 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -454,6 +454,8 @@ class DatetimeLikeArrayMixin( # ------------------------------------------------------------------ # NDArrayBackedExtensionArray compat + # TODO: make this a cache_readonly; need to get around _index_data + # kludge in libreduction @property def _ndarray(self) -> np.ndarray: # NB: A bunch of Interval tests fail if we use ._data @@ -526,6 +528,13 @@ def __getitem__(self, key): only handle list-likes, slices, and integer scalars """ + if lib.is_integer(key): + # fast-path + result = self._data[key] + if self.ndim == 1: + return self._box_func(result) + return self._simple_new(result, dtype=self.dtype) + if com.is_bool_indexer(key): # first convert to boolean, because check_array_indexer doesn't # allow object dtype diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e3fbb906ed6b1..5b173ae4230bc 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -20,6 +20,7 @@ ) import pandas._libs.tslibs.frequencies as libfrequencies from pandas.errors import PerformanceWarning +from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( DT64NS_DTYPE, @@ -475,7 +476,7 @@ def _maybe_clear_freq(self): # ----------------------------------------------------------------- # Descriptive Properties - @property + @cache_readonly def _box_func(self): return lambda x: Timestamp(x, freq=self.freq, tz=self.tz) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 1460a2e762771..d79ce5289a849 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -482,7 +482,7 @@ def _time_shift(self, periods, freq=None): values[self._isnan] = iNaT return type(self)(values, freq=self.freq) - @property + @cache_readonly def _box_func(self): return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a460d07e1f6f2..0c3e087cabcf2 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -12,6 +12,7 @@ precision_from_unit, ) from pandas.compat.numpy import function as nv +from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( DT64NS_DTYPE, @@ -113,7 +114,7 @@ class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): # Note: ndim must be defined to ensure NaT.__richcmp(TimedeltaArray) # operates pointwise. - @property + @cache_readonly def _box_func(self): return lambda x: Timedelta(x, unit="ns") From 2b78edf3e289ab5c4fa077ff1f343d4da5ce12d6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 May 2020 16:02:50 -0700 Subject: [PATCH 2/2] revert --- pandas/core/arrays/datetimes.py | 3 +-- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/timedeltas.py | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 5b173ae4230bc..e3fbb906ed6b1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -20,7 +20,6 @@ ) import pandas._libs.tslibs.frequencies as libfrequencies from pandas.errors import PerformanceWarning -from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( DT64NS_DTYPE, @@ -476,7 +475,7 @@ def _maybe_clear_freq(self): # ----------------------------------------------------------------- # Descriptive Properties - @cache_readonly + @property def _box_func(self): return lambda x: Timestamp(x, freq=self.freq, tz=self.tz) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d79ce5289a849..1460a2e762771 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -482,7 +482,7 @@ def _time_shift(self, periods, freq=None): values[self._isnan] = iNaT return type(self)(values, freq=self.freq) - @cache_readonly + @property def _box_func(self): return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 0c3e087cabcf2..a460d07e1f6f2 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -12,7 +12,6 @@ precision_from_unit, ) from pandas.compat.numpy import function as nv -from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( DT64NS_DTYPE, @@ -114,7 +113,7 @@ class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): # Note: ndim must be defined to ensure NaT.__richcmp(TimedeltaArray) # operates pointwise. - @cache_readonly + @property def _box_func(self): return lambda x: Timedelta(x, unit="ns")