Skip to content

Commit f6ba5c4

Browse files
committed
Merge pull request #7673 from sinhrks/minmax_perf
PERF: improve resample perf
2 parents be7fc39 + 75b9345 commit f6ba5c4

File tree

6 files changed

+78
-51
lines changed

6 files changed

+78
-51
lines changed

pandas/core/base.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
from pandas import compat
55
import numpy as np
66
from pandas.core import common as com
7+
import pandas.core.nanops as nanops
8+
import pandas.tslib as tslib
9+
710

811
class StringMixin(object):
912

@@ -236,13 +239,11 @@ def _wrap_access_object(self, obj):
236239

237240
def max(self):
238241
""" The maximum value of the object """
239-
import pandas.core.nanops
240-
return pandas.core.nanops.nanmax(self.values)
242+
return nanops.nanmax(self.values)
241243

242244
def min(self):
243245
""" The minimum value of the object """
244-
import pandas.core.nanops
245-
return pandas.core.nanops.nanmin(self.values)
246+
return nanops.nanmin(self.values)
246247

247248
def value_counts(self, normalize=False, sort=True, ascending=False,
248249
bins=None, dropna=True):
@@ -406,31 +407,29 @@ def min(self, axis=None):
406407
"""
407408
Overridden ndarray.min to return an object
408409
"""
409-
import pandas.tslib as tslib
410-
mask = self.asi8 == tslib.iNaT
411-
masked = self[~mask]
412-
if len(masked) == 0:
413-
return self._na_value
414-
elif self.is_monotonic:
415-
return masked[0]
416-
else:
417-
min_stamp = masked.asi8.min()
410+
try:
411+
mask = self.asi8 == tslib.iNaT
412+
if mask.any():
413+
min_stamp = self[~mask].asi8.min()
414+
else:
415+
min_stamp = self.asi8.min()
418416
return self._box_func(min_stamp)
417+
except ValueError:
418+
return self._na_value
419419

420420
def max(self, axis=None):
421421
"""
422422
Overridden ndarray.max to return an object
423423
"""
424-
import pandas.tslib as tslib
425-
mask = self.asi8 == tslib.iNaT
426-
masked = self[~mask]
427-
if len(masked) == 0:
428-
return self._na_value
429-
elif self.is_monotonic:
430-
return masked[-1]
431-
else:
432-
max_stamp = masked.asi8.max()
424+
try:
425+
mask = self.asi8 == tslib.iNaT
426+
if mask.any():
427+
max_stamp = self[~mask].asi8.max()
428+
else:
429+
max_stamp = self.asi8.max()
433430
return self._box_func(max_stamp)
431+
except ValueError:
432+
return self._na_value
434433

435434
@property
436435
def _formatter_func(self):

pandas/lib.pyx

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -965,12 +965,14 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
965965
cdef:
966966
Py_ssize_t lenidx, lenbin, i, j, bc, vc
967967
ndarray[int64_t] bins
968-
int64_t l_bin, r_bin
968+
int64_t l_bin, r_bin, nat_count
969969
bint right_closed = closed == 'right'
970970

971971
mask = values == iNaT
972-
nat_count = values[mask].size
973-
values = values[~mask]
972+
nat_count = 0
973+
if mask.any():
974+
nat_count = np.sum(mask)
975+
values = values[~mask]
974976

975977
lenidx = len(values)
976978
lenbin = len(binner)
@@ -991,17 +993,22 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
991993
bc = 0 # bin count
992994

993995
# linear scan
994-
for i in range(0, lenbin - 1):
995-
l_bin = binner[i]
996-
r_bin = binner[i+1]
997-
998-
# count values in current bin, advance to next bin
999-
while j < lenidx and (values[j] < r_bin or
1000-
(right_closed and values[j] == r_bin)):
1001-
j += 1
1002-
1003-
bins[bc] = j
1004-
bc += 1
996+
if right_closed:
997+
for i in range(0, lenbin - 1):
998+
r_bin = binner[i+1]
999+
# count values in current bin, advance to next bin
1000+
while j < lenidx and values[j] <= r_bin:
1001+
j += 1
1002+
bins[bc] = j
1003+
bc += 1
1004+
else:
1005+
for i in range(0, lenbin - 1):
1006+
r_bin = binner[i+1]
1007+
# count values in current bin, advance to next bin
1008+
while j < lenidx and values[j] < r_bin:
1009+
j += 1
1010+
bins[bc] = j
1011+
bc += 1
10051012

10061013
if nat_count > 0:
10071014
# shift bins by the number of NaT

pandas/src/generate_code.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1584,7 +1584,7 @@ def group_mean_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
15841584
for i in range(ngroups):
15851585
for j in range(K):
15861586
count = nobs[i, j]
1587-
if nobs[i, j] == 0:
1587+
if count == 0:
15881588
out[i, j] = nan
15891589
else:
15901590
out[i, j] = sumx[i, j] / count

pandas/tseries/offsets.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
# convert to/from datetime/timestamp to allow invalid Timestamp ranges to pass thru
2828
def as_timestamp(obj):
2929
try:
30+
if isinstance(obj, Timestamp):
31+
return obj
3032
return Timestamp(obj)
3133
except (OutOfBoundsDatetime):
3234
pass
@@ -2014,9 +2016,21 @@ def delta(self):
20142016
def nanos(self):
20152017
return _delta_to_nanoseconds(self.delta)
20162018

2017-
@apply_wraps
20182019
def apply(self, other):
2019-
if isinstance(other, (datetime, timedelta)):
2020+
# Timestamp can handle tz and nano sec, thus no need to use apply_wraps
2021+
if type(other) == date:
2022+
other = datetime(other.year, other.month, other.day)
2023+
elif isinstance(other, (np.datetime64, datetime)):
2024+
other = as_timestamp(other)
2025+
2026+
if isinstance(other, datetime):
2027+
result = other + self.delta
2028+
if self.normalize:
2029+
# normalize_date returns normal datetime
2030+
result = tslib.normalize_date(result)
2031+
return as_timestamp(result)
2032+
2033+
elif isinstance(other, timedelta):
20202034
return other + self.delta
20212035
elif isinstance(other, type(self)):
20222036
return type(self)(self.n + other.n)

pandas/tseries/resample.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ def _get_time_bins(self, ax):
152152
binner = labels = DatetimeIndex(data=[], freq=self.freq, name=ax.name)
153153
return binner, [], labels
154154

155-
first, last = _get_range_edges(ax, self.freq, closed=self.closed,
155+
first, last = ax.min(), ax.max()
156+
first, last = _get_range_edges(first, last, self.freq, closed=self.closed,
156157
base=self.base)
157158
tz = ax.tz
158159
binner = labels = DatetimeIndex(freq=self.freq,
@@ -163,7 +164,7 @@ def _get_time_bins(self, ax):
163164

164165
# a little hack
165166
trimmed = False
166-
if (len(binner) > 2 and binner[-2] == ax.max() and
167+
if (len(binner) > 2 and binner[-2] == last and
167168
self.closed == 'right'):
168169

169170
binner = binner[:-1]
@@ -353,11 +354,10 @@ def _take_new_index(obj, indexer, new_index, axis=0):
353354
raise NotImplementedError
354355

355356

356-
def _get_range_edges(axis, offset, closed='left', base=0):
357+
def _get_range_edges(first, last, offset, closed='left', base=0):
357358
if isinstance(offset, compat.string_types):
358359
offset = to_offset(offset)
359360

360-
first, last = axis.min(), axis.max()
361361
if isinstance(offset, Tick):
362362
day_nanos = _delta_to_nanoseconds(timedelta(1))
363363
# #1165

pandas/tslib.pyx

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3134,14 +3134,21 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
31343134
else:
31353135
relation = START
31363136

3137-
for i in range(n):
3138-
if arr[i] == iNaT:
3139-
result[i] = iNaT
3140-
continue
3141-
val = func(arr[i], relation, &finfo)
3142-
if val == INT32_MIN:
3143-
raise ValueError("Unable to convert to desired frequency.")
3144-
result[i] = val
3137+
mask = arr == iNaT
3138+
if mask.any(): # NaT process
3139+
for i in range(n):
3140+
val = arr[i]
3141+
if val != iNaT:
3142+
val = func(val, relation, &finfo)
3143+
if val == INT32_MIN:
3144+
raise ValueError("Unable to convert to desired frequency.")
3145+
result[i] = val
3146+
else:
3147+
for i in range(n):
3148+
val = func(arr[i], relation, &finfo)
3149+
if val == INT32_MIN:
3150+
raise ValueError("Unable to convert to desired frequency.")
3151+
result[i] = val
31453152

31463153
return result
31473154

0 commit comments

Comments
 (0)