diff --git a/pandas/core/base.py b/pandas/core/base.py index 1ba5061cd7e9a..585db0f49d8bf 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -6,7 +6,7 @@ from pandas.core import common as com import pandas.core.nanops as nanops import pandas.tslib as tslib - +from pandas.util.decorators import cache_readonly class StringMixin(object): @@ -392,6 +392,11 @@ def _box_values(self, values): import pandas.lib as lib return lib.map_infer(values, self._box_func) + @cache_readonly + def hasnans(self): + """ return if I have any nans; enables various perf speedups """ + return (self.asi8 == tslib.iNaT).any() + @property def asobject(self): from pandas.core.index import Index @@ -408,11 +413,18 @@ def min(self, axis=None): Overridden ndarray.min to return an object """ try: - mask = self.asi8 == tslib.iNaT - if mask.any(): + i8 = self.asi8 + + # quick check + if len(i8) and self.is_monotonic: + if i8[0] != tslib.iNaT: + return self._box_func(i8[0]) + + if self.hasnans: + mask = i8 == tslib.iNaT min_stamp = self[~mask].asi8.min() else: - min_stamp = self.asi8.min() + min_stamp = i8.min() return self._box_func(min_stamp) except ValueError: return self._na_value @@ -422,11 +434,18 @@ def max(self, axis=None): Overridden ndarray.max to return an object """ try: - mask = self.asi8 == tslib.iNaT - if mask.any(): + i8 = self.asi8 + + # quick check + if len(i8) and self.is_monotonic: + if i8[-1] != tslib.iNaT: + return self._box_func(i8[-1]) + + if self.hasnans: + mask = i8 == tslib.iNaT max_stamp = self[~mask].asi8.max() else: - max_stamp = self.asi8.max() + max_stamp = i8.max() return self._box_func(max_stamp) except ValueError: return self._na_value diff --git a/pandas/core/index.py b/pandas/core/index.py index 51ddacd00af08..262305a335d46 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2072,7 +2072,7 @@ def __contains__(self, other): try: # if other is a sequence this throws a ValueError - return np.isnan(other) and self._hasnans + return np.isnan(other) and self.hasnans except ValueError: try: return len(other) <= 1 and _try_get_item(other) in self @@ -2109,7 +2109,7 @@ def _isnan(self): return np.isnan(self.values) @cache_readonly - def _hasnans(self): + def hasnans(self): return self._isnan.any() @cache_readonly diff --git a/pandas/lib.pyx b/pandas/lib.pyx index a064e714e7f89..7690cc4819dd5 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -958,7 +958,7 @@ def is_lexsorted(list list_of_arrays): @cython.boundscheck(False) @cython.wraparound(False) def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, - object closed='left'): + object closed='left', bint hasnans=0): """ Int64 (datetime64) version of generic python version in groupby.py """ @@ -968,9 +968,9 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, int64_t l_bin, r_bin, nat_count bint right_closed = closed == 'right' - mask = values == iNaT nat_count = 0 - if mask.any(): + if hasnans: + mask = values == iNaT nat_count = np.sum(mask) values = values[~mask] diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 1ee7664f7bb9a..01aff164d8384 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -174,7 +174,7 @@ def _get_time_bins(self, ax): binner, bin_edges = self._adjust_bin_edges(binner, ax_values) # general version, knowing nothing about relative frequencies - bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed) + bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed, hasnans=ax.hasnans) if self.closed == 'right': labels = binner @@ -188,7 +188,7 @@ def _get_time_bins(self, ax): elif not trimmed: labels = labels[:-1] - if (ax_values == tslib.iNaT).any(): + if ax.hasnans: binner = binner.insert(0, tslib.NaT) labels = labels.insert(0, tslib.NaT)