diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 534117b8e9249..caa5d83da6b87 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1793,17 +1793,25 @@ def indices(self): def group_info(self): ngroups = self.ngroups obs_group_ids = np.arange(ngroups) - comp_ids = np.repeat(np.arange(ngroups), np.diff(np.r_[0, self.bins])) + rep = np.diff(np.r_[0, self.bins]) + + if ngroups == len(self.bins): + comp_ids = np.repeat(np.arange(ngroups), rep) + else: + comp_ids = np.repeat(np.r_[-1, np.arange(ngroups)], rep) + return comp_ids, obs_group_ids, ngroups @cache_readonly def ngroups(self): - return len(self.binlabels) + return len(self.result_index) @cache_readonly def result_index(self): - mask = self.binlabels.asi8 == tslib.iNaT - return self.binlabels[~mask] + if len(self.binlabels) != 0 and isnull(self.binlabels[0]): + return self.binlabels[1:] + + return self.binlabels @property def levels(self): @@ -1839,40 +1847,14 @@ def size(self): #---------------------------------------------------------------------- # cython aggregation - _cython_functions = { - 'add': 'group_add_bin', - 'prod': 'group_prod_bin', - 'mean': 'group_mean_bin', - 'min': 'group_min_bin', - 'max': 'group_max_bin', - 'var': 'group_var_bin', - 'ohlc': 'group_ohlc', - 'first': { - 'name': 'group_nth_bin', - 'f': lambda func, a, b, c, d: func(a, b, c, d, 1) - }, - 'last': 'group_last_bin', - 'count': 'group_count_bin', - } + _cython_functions = {'ohlc': 'group_ohlc'} + _cython_functions.update(BaseGrouper._cython_functions) + _cython_functions.pop('median') _name_functions = { 'ohlc': lambda *args: ['open', 'high', 'low', 'close'] } - def _aggregate(self, result, counts, values, agg_func, is_numeric=True): - - if values.ndim > 3: - # punting for now - raise NotImplementedError("number of dimensions is currently " - "limited to 3") - elif values.ndim > 2: - for i, chunk in enumerate(values.transpose(2, 0, 1)): - agg_func(result[:, :, i], counts, chunk, self.bins) - else: - agg_func(result, counts, values, self.bins) - - return result - def agg_series(self, obj, func): dummy = obj[:0] grouper = lib.SeriesBinGrouper(obj, func, self.bins, dummy) diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index 29a991a9acfd3..c086919d94644 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -751,105 +751,6 @@ def group_last_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = resx[i, j] """ -group_last_bin_template = """@cython.wraparound(False) -@cython.boundscheck(False) -def group_last_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(c_type)s, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - %(dest_type2)s val, count - ndarray[%(dest_type2)s, ndim=2] resx, nobs - - nobs = np.zeros_like(out) - resx = np.empty_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = %(nan_val)s - else: - out[i, j] = resx[i, j] -""" - -group_nth_bin_template = """@cython.wraparound(False) -@cython.boundscheck(False) -def group_nth_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(c_type)s, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - %(dest_type2)s val, count - ndarray[%(dest_type2)s, ndim=2] resx, nobs - - nobs = np.zeros_like(out) - resx = np.empty_like(out) - - if len(bin) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = %(nan_val)s - else: - out[i, j] = resx[i, j] -""" - group_nth_template = """@cython.wraparound(False) @cython.boundscheck(False) def group_nth_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, @@ -961,69 +862,6 @@ def group_add_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = sumx[i, j] """ -group_add_bin_template = """@cython.wraparound(False) -@cython.boundscheck(False) -def group_add_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(dest_type2)s, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b, nbins - %(dest_type2)s val, count - ndarray[%(dest_type2)s, ndim=2] sumx, nobs - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape - - with nogil: - - b = 0 - if K > 1: - - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = sumx[i, j] -""" - group_prod_template = """@cython.wraparound(False) @cython.boundscheck(False) def group_prod_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, @@ -1083,68 +921,6 @@ def group_prod_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = prodx[i, j] """ -group_prod_bin_template = """@cython.wraparound(False) -@cython.boundscheck(False) -def group_prod_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(dest_type2)s, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - %(dest_type2)s val, count - ndarray[%(dest_type2)s, ndim=2] prodx, nobs - - nobs = np.zeros_like(out) - prodx = np.ones_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape - - with nogil: - - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - prodx[b, j] *= val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - prodx[b, 0] *= val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = prodx[i, j] -""" - group_var_template = """@cython.wraparound(False) @cython.boundscheck(False) @cython.cdivision(True) @@ -1195,72 +971,6 @@ def group_var_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, """ -group_var_bin_template = """@cython.wraparound(False) -@cython.boundscheck(False) -def group_var_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(dest_type2)s, ndim=2] values, - ndarray[int64_t] bins): - - cdef: - Py_ssize_t i, j, N, K, ngroups, b - %(dest_type2)s val, ct - ndarray[%(dest_type2)s, ndim=2] nobs, sumx, sumxx - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - sumxx = np.zeros_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - sumxx[b, j] += val * val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - sumxx[b, 0] += val * val - - for i in range(ngroups): - for j in range(K): - ct = nobs[i, j] - if ct < 2: - out[i, j] = NAN - else: - out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / - (ct * ct - ct)) -""" - group_count_template = """@cython.boundscheck(False) @cython.wraparound(False) def group_count_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, @@ -1299,115 +1009,12 @@ def group_count_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, %(tab)s out[i, j] = nobs[i, j] """ -group_count_bin_template = """@cython.wraparound(False) -@cython.boundscheck(False) -def group_count_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(c_type)s, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, ngroups - Py_ssize_t N = values.shape[0], K = values.shape[1], b = 0 - %(c_type)s val - ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), - dtype=np.int64) - - if len(bins) == 0: - return - ngroups = len(bins) + (bins[len(bins) - 1] != N) - - %(nogil)s - %(tab)sfor i in range(N): - %(tab)s while b < ngroups - 1 and i >= bins[b]: - %(tab)s b += 1 - - %(tab)s counts[b] += 1 - %(tab)s for j in range(K): - %(tab)s val = values[i, j] - - %(tab)s # not nan - %(tab)s nobs[b, j] += val == val and val != iNaT - - %(tab)sfor i in range(ngroups): - %(tab)s for j in range(K): - %(tab)s out[i, j] = nobs[i, j] -""" - # add passing bin edges, instead of labels #---------------------------------------------------------------------- # group_min, group_max -group_min_bin_template = """@cython.wraparound(False) -@cython.boundscheck(False) -def group_min_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(dest_type2)s, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - %(dest_type2)s val, count - ndarray[%(dest_type2)s, ndim=2] minx, nobs - - nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(%(inf_val)s) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if val < minx[b, j]: - minx[b, j] = val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - if val < minx[b, 0]: - minx[b, 0] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = %(nan_val)s - else: - out[i, j] = minx[i, j] -""" - group_max_template = """@cython.wraparound(False) @cython.boundscheck(False) def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, @@ -1471,72 +1078,6 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = maxx[i, j] """ -group_max_bin_template = """@cython.wraparound(False) -@cython.boundscheck(False) -def group_max_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(dest_type2)s, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - %(dest_type2)s val, count - ndarray[%(dest_type2)s, ndim=2] maxx, nobs - - nobs = np.zeros_like(out) - maxx = np.empty_like(out) - maxx.fill(-%(inf_val)s) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if val > maxx[b, j]: - maxx[b, j] = val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - if val > maxx[b, 0]: - maxx[b, 0] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = %(nan_val)s - else: - out[i, j] = maxx[i, j] -""" - - group_min_template = """@cython.wraparound(False) @cython.boundscheck(False) def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, @@ -1656,141 +1197,50 @@ def group_mean_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, out[i, j] = sumx[i, j] / count """ -group_mean_bin_template = """ -@cython.boundscheck(False) -def group_mean_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, - ndarray[int64_t] counts, - ndarray[%(dest_type2)s, ndim=2] values, - ndarray[int64_t] bins): - cdef: - Py_ssize_t i, j, N, K, ngroups, b - %(dest_type2)s val, count - ndarray[%(dest_type2)s, ndim=2] sumx, nobs - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - - N, K = ( values).shape - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - - for i in range(ngroups): - for j in range(K): - count = nobs[i, j] - if count == 0: - out[i, j] = NAN - else: - out[i, j] = sumx[i, j] / count -""" - group_ohlc_template = """@cython.wraparound(False) @cython.boundscheck(False) def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out, ndarray[int64_t] counts, ndarray[%(dest_type2)s, ndim=2] values, - ndarray[int64_t] bins): + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b + Py_ssize_t i, j, N, K, lab %(dest_type2)s val, count - %(dest_type2)s vopen, vhigh, vlow, vclose - bint got_first = 0 + Py_ssize_t ngroups = len(counts) - if len(bins) == 0: + if len(labels) == 0: return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 N, K = ( values).shape if out.shape[1] != 4: raise ValueError('Output array must have 4 columns') - b = 0 if K > 1: raise NotImplementedError("Argument 'values' must have only " "one dimension") - else: + out.fill(np.nan) - with nogil: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - if not got_first: - out[b, 0] = NAN - out[b, 1] = NAN - out[b, 2] = NAN - out[b, 3] = NAN - else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose - b += 1 - got_first = 0 - - counts[b] += 1 - val = values[i, 0] + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: + continue - # not nan - if val == val: - if not got_first: - got_first = 1 - vopen = val - vlow = val - vhigh = val - else: - if val < vlow: - vlow = val - if val > vhigh: - vhigh = val - vclose = val - - if not got_first: - out[b, 0] = NAN - out[b, 1] = NAN - out[b, 2] = NAN - out[b, 3] = NAN + counts[lab] += 1 + val = values[i, 0] + if val != val: + continue + + if out[lab, 0] != out[lab, 0]: + out[lab, 0] = out[lab, 1] = out[lab, 2] = out[lab, 3] = val else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose + out[lab, 1] = max(out[lab, 1], val) + out[lab, 2] = min(out[lab, 2], val) + out[lab, 3] = val """ arrmap_template = """@cython.wraparound(False) @@ -2534,26 +1984,18 @@ def generate_from_template(template, exclude=None): put_2d = [diff_2d_template] groupbys = [group_add_template, - group_add_bin_template, group_prod_template, - group_prod_bin_template, group_var_template, - group_var_bin_template, group_mean_template, - group_mean_bin_template, group_ohlc_template] groupby_selection = [group_last_template, - group_last_bin_template, - group_nth_template, - group_nth_bin_template] + group_nth_template] groupby_min_max = [group_min_template, - group_min_bin_template, - group_max_template, - group_max_bin_template] + group_max_template] -groupby_count = [group_count_template, group_count_bin_template] +groupby_count = [group_count_template] templates_1d = [map_indices_template, pad_template, diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx index d4cf7824c8911..c0ecd04749e58 100644 --- a/pandas/src/generated.pyx +++ b/pandas/src/generated.pyx @@ -6865,131 +6865,6 @@ def group_add_float32(ndarray[float32_t, ndim=2] out, out[i, j] = sumx[i, j] -@cython.wraparound(False) -@cython.boundscheck(False) -def group_add_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b, nbins - float64_t val, count - ndarray[float64_t, ndim=2] sumx, nobs - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape - - with nogil: - - b = 0 - if K > 1: - - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = sumx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_add_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b, nbins - float32_t val, count - ndarray[float32_t, ndim=2] sumx, nobs - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape - - with nogil: - - b = 0 - if K > 1: - - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = sumx[i, j] - - @cython.wraparound(False) @cython.boundscheck(False) def group_prod_float64(ndarray[float64_t, ndim=2] out, @@ -7107,129 +6982,6 @@ def group_prod_float32(ndarray[float32_t, ndim=2] out, out[i, j] = prodx[i, j] -@cython.wraparound(False) -@cython.boundscheck(False) -def group_prod_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, count - ndarray[float64_t, ndim=2] prodx, nobs - - nobs = np.zeros_like(out) - prodx = np.ones_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape - - with nogil: - - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - prodx[b, j] *= val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - prodx[b, 0] *= val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = prodx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_prod_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - ndarray[float32_t, ndim=2] prodx, nobs - - nobs = np.zeros_like(out) - prodx = np.ones_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape - - with nogil: - - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - prodx[b, j] *= val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - prodx[b, 0] *= val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = prodx[i, j] - - @cython.wraparound(False) @cython.boundscheck(False) @cython.cdivision(True) @@ -7329,137 +7081,6 @@ def group_var_float32(ndarray[float32_t, ndim=2] out, -@cython.wraparound(False) -@cython.boundscheck(False) -def group_var_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, ct - ndarray[float64_t, ndim=2] nobs, sumx, sumxx - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - sumxx = np.zeros_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - sumxx[b, j] += val * val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - sumxx[b, 0] += val * val - - for i in range(ngroups): - for j in range(K): - ct = nobs[i, j] - if ct < 2: - out[i, j] = NAN - else: - out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / - (ct * ct - ct)) - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_var_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, ct - ndarray[float32_t, ndim=2] nobs, sumx, sumxx - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - sumxx = np.zeros_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - sumxx[b, j] += val * val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - sumxx[b, 0] += val * val - - for i in range(ngroups): - for j in range(K): - ct = nobs[i, j] - if ct < 2: - out[i, j] = NAN - else: - out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) / - (ct * ct - ct)) - - @cython.wraparound(False) @cython.boundscheck(False) def group_mean_float64(ndarray[float64_t, ndim=2] out, @@ -7569,276 +7190,95 @@ def group_mean_float32(ndarray[float32_t, ndim=2] out, out[i, j] = sumx[i, j] / count - -@cython.boundscheck(False) -def group_mean_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, count - ndarray[float64_t, ndim=2] sumx, nobs - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - - N, K = ( values).shape - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - - for i in range(ngroups): - for j in range(K): - count = nobs[i, j] - if count == 0: - out[i, j] = NAN - else: - out[i, j] = sumx[i, j] / count - - -@cython.boundscheck(False) -def group_mean_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - ndarray[float32_t, ndim=2] sumx, nobs - - nobs = np.zeros_like(out) - sumx = np.zeros_like(out) - - N, K = ( values).shape - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - sumx[b, j] += val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - sumx[b, 0] += val - - for i in range(ngroups): - for j in range(K): - count = nobs[i, j] - if count == 0: - out[i, j] = NAN - else: - out[i, j] = sumx[i, j] / count - - -@cython.wraparound(False) +@cython.wraparound(False) @cython.boundscheck(False) def group_ohlc_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b + Py_ssize_t i, j, N, K, lab float64_t val, count - float64_t vopen, vhigh, vlow, vclose - bint got_first = 0 + Py_ssize_t ngroups = len(counts) - if len(bins) == 0: + if len(labels) == 0: return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 N, K = ( values).shape if out.shape[1] != 4: raise ValueError('Output array must have 4 columns') - b = 0 if K > 1: raise NotImplementedError("Argument 'values' must have only " "one dimension") - else: + out.fill(np.nan) - with nogil: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - if not got_first: - out[b, 0] = NAN - out[b, 1] = NAN - out[b, 2] = NAN - out[b, 3] = NAN - else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose - b += 1 - got_first = 0 - - counts[b] += 1 - val = values[i, 0] + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: + continue - # not nan - if val == val: - if not got_first: - got_first = 1 - vopen = val - vlow = val - vhigh = val - else: - if val < vlow: - vlow = val - if val > vhigh: - vhigh = val - vclose = val - - if not got_first: - out[b, 0] = NAN - out[b, 1] = NAN - out[b, 2] = NAN - out[b, 3] = NAN + counts[lab] += 1 + val = values[i, 0] + if val != val: + continue + + if out[lab, 0] != out[lab, 0]: + out[lab, 0] = out[lab, 1] = out[lab, 2] = out[lab, 3] = val else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose + out[lab, 1] = max(out[lab, 1], val) + out[lab, 2] = min(out[lab, 2], val) + out[lab, 3] = val @cython.wraparound(False) @cython.boundscheck(False) def group_ohlc_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b + Py_ssize_t i, j, N, K, lab float32_t val, count - float32_t vopen, vhigh, vlow, vclose - bint got_first = 0 + Py_ssize_t ngroups = len(counts) - if len(bins) == 0: + if len(labels) == 0: return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 N, K = ( values).shape if out.shape[1] != 4: raise ValueError('Output array must have 4 columns') - b = 0 if K > 1: raise NotImplementedError("Argument 'values' must have only " "one dimension") - else: + out.fill(np.nan) - with nogil: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - if not got_first: - out[b, 0] = NAN - out[b, 1] = NAN - out[b, 2] = NAN - out[b, 3] = NAN - else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose - b += 1 - got_first = 0 - - counts[b] += 1 - val = values[i, 0] + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: + continue - # not nan - if val == val: - if not got_first: - got_first = 1 - vopen = val - vlow = val - vhigh = val - else: - if val < vlow: - vlow = val - if val > vhigh: - vhigh = val - vclose = val - - if not got_first: - out[b, 0] = NAN - out[b, 1] = NAN - out[b, 2] = NAN - out[b, 3] = NAN + counts[lab] += 1 + val = values[i, 0] + if val != val: + continue + + if out[lab, 0] != out[lab, 0]: + out[lab, 0] = out[lab, 1] = out[lab, 2] = out[lab, 3] = val else: - out[b, 0] = vopen - out[b, 1] = vhigh - out[b, 2] = vlow - out[b, 3] = vclose + out[lab, 1] = max(out[lab, 1], val) + out[lab, 2] = min(out[lab, 2], val) + out[lab, 3] = val @cython.wraparound(False) @@ -7977,151 +7417,6 @@ def group_last_int64(ndarray[int64_t, ndim=2] out, out[i, j] = resx[i, j] -@cython.wraparound(False) -@cython.boundscheck(False) -def group_last_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, count - ndarray[float64_t, ndim=2] resx, nobs - - nobs = np.zeros_like(out) - resx = np.empty_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = resx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_last_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - ndarray[float32_t, ndim=2] resx, nobs - - nobs = np.zeros_like(out) - resx = np.empty_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = resx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_last_bin_int64(ndarray[int64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[int64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - int64_t val, count - ndarray[int64_t, ndim=2] resx, nobs - - nobs = np.zeros_like(out) - resx = np.empty_like(out) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = iNaT - else: - out[i, j] = resx[i, j] - - @cython.wraparound(False) @cython.boundscheck(False) def group_nth_float64(ndarray[float64_t, ndim=2] out, @@ -8263,538 +7558,7 @@ def group_nth_int64(ndarray[int64_t, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_nth_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, count - ndarray[float64_t, ndim=2] resx, nobs - - nobs = np.zeros_like(out) - resx = np.empty_like(out) - - if len(bin) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = resx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_nth_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - ndarray[float32_t, ndim=2] resx, nobs - - nobs = np.zeros_like(out) - resx = np.empty_like(out) - - if len(bin) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = resx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_nth_bin_int64(ndarray[int64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[int64_t, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - int64_t val, count - ndarray[int64_t, ndim=2] resx, nobs - - nobs = np.zeros_like(out) - resx = np.empty_like(out) - - if len(bin) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = iNaT - else: - out[i, j] = resx[i, j] - - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_min_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - float64_t val, count - ndarray[float64_t, ndim=2] minx, nobs - - if not len(values) == len(labels): - raise AssertionError("len(index) != len(labels)") - - nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(np.inf) - - N, K = ( values).shape - - with nogil: - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - if val < minx[lab, j]: - minx[lab, j] = val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[lab, 0] += 1 - if val < minx[lab, 0]: - minx[lab, 0] = val - - for i in range(ncounts): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = minx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_min_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - float32_t val, count - ndarray[float32_t, ndim=2] minx, nobs - - if not len(values) == len(labels): - raise AssertionError("len(index) != len(labels)") - - nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(np.inf) - - N, K = ( values).shape - - with nogil: - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - if val < minx[lab, j]: - minx[lab, j] = val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[lab, 0] += 1 - if val < minx[lab, 0]: - minx[lab, 0] = val - - for i in range(ncounts): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = minx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_min_int64(ndarray[int64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[int64_t, ndim=2] values, - ndarray[int64_t] labels): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - int64_t val, count - ndarray[int64_t, ndim=2] minx, nobs - - if not len(values) == len(labels): - raise AssertionError("len(index) != len(labels)") - - nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(9223372036854775807) - - N, K = ( values).shape - - with nogil: - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - if val < minx[lab, j]: - minx[lab, j] = val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[lab, 0] += 1 - if val < minx[lab, 0]: - minx[lab, 0] = val - - for i in range(ncounts): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = iNaT - else: - out[i, j] = minx[i, j] - - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_min_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float64_t val, count - ndarray[float64_t, ndim=2] minx, nobs - - nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(np.inf) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if val < minx[b, j]: - minx[b, j] = val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - if val < minx[b, 0]: - minx[b, 0] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = minx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_min_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - float32_t val, count - ndarray[float32_t, ndim=2] minx, nobs - - nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(np.inf) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if val < minx[b, j]: - minx[b, j] = val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - if val < minx[b, 0]: - minx[b, 0] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = NAN - else: - out[i, j] = minx[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_min_bin_int64(ndarray[int64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[int64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, N, K, ngroups, b - int64_t val, count - ndarray[int64_t, ndim=2] minx, nobs - - nobs = np.zeros_like(out) - - minx = np.empty_like(out) - minx.fill(9223372036854775807) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - with nogil: - b = 0 - if K > 1: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if val < minx[b, j]: - minx[b, j] = val - else: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - val = values[i, 0] - - # not nan - if val == val: - nobs[b, 0] += 1 - if val < minx[b, 0]: - minx[b, 0] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = iNaT - else: - out[i, j] = minx[i, j] - - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_max_float64(ndarray[float64_t, ndim=2] out, +def group_min_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[int64_t] labels): @@ -8804,15 +7568,15 @@ def group_max_float64(ndarray[float64_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float64_t val, count - ndarray[float64_t, ndim=2] maxx, nobs + ndarray[float64_t, ndim=2] minx, nobs if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") nobs = np.zeros_like(out) - maxx = np.empty_like(out) - maxx.fill(-np.inf) + minx = np.empty_like(out) + minx.fill(np.inf) N, K = ( values).shape @@ -8830,8 +7594,8 @@ def group_max_float64(ndarray[float64_t, ndim=2] out, # not nan if val == val: nobs[lab, j] += 1 - if val > maxx[lab, j]: - maxx[lab, j] = val + if val < minx[lab, j]: + minx[lab, j] = val else: for i in range(N): lab = labels[i] @@ -8844,19 +7608,19 @@ def group_max_float64(ndarray[float64_t, ndim=2] out, # not nan if val == val: nobs[lab, 0] += 1 - if val > maxx[lab, 0]: - maxx[lab, 0] = val + if val < minx[lab, 0]: + minx[lab, 0] = val for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: out[i, j] = NAN else: - out[i, j] = maxx[i, j] + out[i, j] = minx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def group_max_float32(ndarray[float32_t, ndim=2] out, +def group_min_float32(ndarray[float32_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float32_t, ndim=2] values, ndarray[int64_t] labels): @@ -8866,15 +7630,15 @@ def group_max_float32(ndarray[float32_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float32_t val, count - ndarray[float32_t, ndim=2] maxx, nobs + ndarray[float32_t, ndim=2] minx, nobs if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") nobs = np.zeros_like(out) - maxx = np.empty_like(out) - maxx.fill(-np.inf) + minx = np.empty_like(out) + minx.fill(np.inf) N, K = ( values).shape @@ -8892,8 +7656,8 @@ def group_max_float32(ndarray[float32_t, ndim=2] out, # not nan if val == val: nobs[lab, j] += 1 - if val > maxx[lab, j]: - maxx[lab, j] = val + if val < minx[lab, j]: + minx[lab, j] = val else: for i in range(N): lab = labels[i] @@ -8906,19 +7670,19 @@ def group_max_float32(ndarray[float32_t, ndim=2] out, # not nan if val == val: nobs[lab, 0] += 1 - if val > maxx[lab, 0]: - maxx[lab, 0] = val + if val < minx[lab, 0]: + minx[lab, 0] = val for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: out[i, j] = NAN else: - out[i, j] = maxx[i, j] + out[i, j] = minx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def group_max_int64(ndarray[int64_t, ndim=2] out, +def group_min_int64(ndarray[int64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[int64_t, ndim=2] values, ndarray[int64_t] labels): @@ -8928,15 +7692,15 @@ def group_max_int64(ndarray[int64_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) int64_t val, count - ndarray[int64_t, ndim=2] maxx, nobs + ndarray[int64_t, ndim=2] minx, nobs if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") nobs = np.zeros_like(out) - maxx = np.empty_like(out) - maxx.fill(-9223372036854775807) + minx = np.empty_like(out) + minx.fill(9223372036854775807) N, K = ( values).shape @@ -8954,8 +7718,8 @@ def group_max_int64(ndarray[int64_t, ndim=2] out, # not nan if val == val: nobs[lab, j] += 1 - if val > maxx[lab, j]: - maxx[lab, j] = val + if val < minx[lab, j]: + minx[lab, j] = val else: for i in range(N): lab = labels[i] @@ -8968,75 +7732,73 @@ def group_max_int64(ndarray[int64_t, ndim=2] out, # not nan if val == val: nobs[lab, 0] += 1 - if val > maxx[lab, 0]: - maxx[lab, 0] = val + if val < minx[lab, 0]: + minx[lab, 0] = val for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: out[i, j] = iNaT else: - out[i, j] = maxx[i, j] + out[i, j] = minx[i, j] @cython.wraparound(False) @cython.boundscheck(False) -def group_max_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): +def group_max_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float64_t val, count ndarray[float64_t, ndim=2] maxx, nobs + if not len(values) == len(labels): + raise AssertionError("len(index) != len(labels)") + nobs = np.zeros_like(out) + maxx = np.empty_like(out) maxx.fill(-np.inf) - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape with nogil: - b = 0 if K > 1: for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + lab = labels[i] + if lab < 0: + continue - counts[b] += 1 + counts[lab] += 1 for j in range(K): val = values[i, j] # not nan if val == val: - nobs[b, j] += 1 - if val > maxx[b, j]: - maxx[b, j] = val + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val else: for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + lab = labels[i] + if lab < 0: + continue - counts[b] += 1 + counts[lab] += 1 val = values[i, 0] # not nan if val == val: - nobs[b, 0] += 1 - if val > maxx[b, 0]: - maxx[b, 0] = val + nobs[lab, 0] += 1 + if val > maxx[lab, 0]: + maxx[lab, 0] = val - for i in range(ngroups): + for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: out[i, j] = NAN @@ -9045,62 +7807,60 @@ def group_max_bin_float64(ndarray[float64_t, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_max_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): +def group_max_float32(ndarray[float32_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float32_t, ndim=2] values, + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) float32_t val, count ndarray[float32_t, ndim=2] maxx, nobs + if not len(values) == len(labels): + raise AssertionError("len(index) != len(labels)") + nobs = np.zeros_like(out) + maxx = np.empty_like(out) maxx.fill(-np.inf) - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape with nogil: - b = 0 if K > 1: for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + lab = labels[i] + if lab < 0: + continue - counts[b] += 1 + counts[lab] += 1 for j in range(K): val = values[i, j] # not nan if val == val: - nobs[b, j] += 1 - if val > maxx[b, j]: - maxx[b, j] = val + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val else: for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + lab = labels[i] + if lab < 0: + continue - counts[b] += 1 + counts[lab] += 1 val = values[i, 0] # not nan if val == val: - nobs[b, 0] += 1 - if val > maxx[b, 0]: - maxx[b, 0] = val + nobs[lab, 0] += 1 + if val > maxx[lab, 0]: + maxx[lab, 0] = val - for i in range(ngroups): + for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: out[i, j] = NAN @@ -9109,62 +7869,60 @@ def group_max_bin_float32(ndarray[float32_t, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_max_bin_int64(ndarray[int64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[int64_t, ndim=2] values, - ndarray[int64_t] bins): +def group_max_int64(ndarray[int64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[int64_t, ndim=2] values, + ndarray[int64_t] labels): ''' Only aggregates on axis=0 ''' cdef: - Py_ssize_t i, j, N, K, ngroups, b + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) int64_t val, count ndarray[int64_t, ndim=2] maxx, nobs + if not len(values) == len(labels): + raise AssertionError("len(index) != len(labels)") + nobs = np.zeros_like(out) + maxx = np.empty_like(out) maxx.fill(-9223372036854775807) - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - N, K = ( values).shape with nogil: - b = 0 if K > 1: for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + lab = labels[i] + if lab < 0: + continue - counts[b] += 1 + counts[lab] += 1 for j in range(K): val = values[i, j] # not nan if val == val: - nobs[b, j] += 1 - if val > maxx[b, j]: - maxx[b, j] = val + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val else: for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 + lab = labels[i] + if lab < 0: + continue - counts[b] += 1 + counts[lab] += 1 val = values[i, 0] # not nan if val == val: - nobs[b, 0] += 1 - if val > maxx[b, 0]: - maxx[b, 0] = val + nobs[lab, 0] += 1 + if val > maxx[lab, 0]: + maxx[lab, 0] = val - for i in range(ngroups): + for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: out[i, j] = iNaT @@ -9358,187 +8116,6 @@ def group_count_int64(ndarray[int64_t, ndim=2] out, out[i, j] = nobs[i, j] -@cython.wraparound(False) -@cython.boundscheck(False) -def group_count_bin_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, ngroups - Py_ssize_t N = values.shape[0], K = values.shape[1], b = 0 - float64_t val - ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), - dtype=np.int64) - - if len(bins) == 0: - return - ngroups = len(bins) + (bins[len(bins) - 1] != N) - - with nogil: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - nobs[b, j] += val == val and val != iNaT - - for i in range(ngroups): - for j in range(K): - out[i, j] = nobs[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_count_bin_float32(ndarray[float32_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float32_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, ngroups - Py_ssize_t N = values.shape[0], K = values.shape[1], b = 0 - float32_t val - ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), - dtype=np.int64) - - if len(bins) == 0: - return - ngroups = len(bins) + (bins[len(bins) - 1] != N) - - with nogil: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - nobs[b, j] += val == val and val != iNaT - - for i in range(ngroups): - for j in range(K): - out[i, j] = nobs[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_count_bin_int64(ndarray[int64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[int64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, ngroups - Py_ssize_t N = values.shape[0], K = values.shape[1], b = 0 - int64_t val - ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), - dtype=np.int64) - - if len(bins) == 0: - return - ngroups = len(bins) + (bins[len(bins) - 1] != N) - - with nogil: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - nobs[b, j] += val == val and val != iNaT - - for i in range(ngroups): - for j in range(K): - out[i, j] = nobs[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_count_bin_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, ngroups - Py_ssize_t N = values.shape[0], K = values.shape[1], b = 0 - object val - ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), - dtype=np.int64) - - if len(bins) == 0: - return - ngroups = len(bins) + (bins[len(bins) - 1] != N) - - - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - nobs[b, j] += val == val and val != iNaT - - for i in range(ngroups): - for j in range(K): - out[i, j] = nobs[i, j] - -@cython.wraparound(False) -@cython.boundscheck(False) -def group_count_bin_int64(ndarray[int64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[int64_t, ndim=2] values, - ndarray[int64_t] bins): - ''' - Only aggregates on axis=0 - ''' - cdef: - Py_ssize_t i, j, ngroups - Py_ssize_t N = values.shape[0], K = values.shape[1], b = 0 - int64_t val - ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]), - dtype=np.int64) - - if len(bins) == 0: - return - ngroups = len(bins) + (bins[len(bins) - 1] != N) - - with nogil: - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - nobs[b, j] += val == val and val != iNaT - - for i in range(ngroups): - for j in range(K): - out[i, j] = nobs[i, j] - - @cython.wraparound(False) @cython.boundscheck(False) def left_join_indexer_unique_float64(ndarray[float64_t] left, diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 6dd43539eeabf..566fd54f3b024 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -474,78 +474,19 @@ def test_generate_bins(self): self.assertRaises(ValueError, generate_bins_generic, values, [-3, -1], 'right') - def test_group_bin_functions(self): - - dtypes = ['float32','float64'] - funcs = ['add', 'mean', 'prod', 'min', 'max', 'var'] - - np_funcs = { - 'add': np.sum, - 'mean': np.mean, - 'prod': np.prod, - 'min': np.min, - 'max': np.max, - 'var': lambda x: x.var(ddof=1) if len(x) >= 2 else np.nan - } - - for fname in funcs: - for d in dtypes: - check_less_precise = False - if d == 'float32': - check_less_precise = True - args = [getattr(algos, 'group_%s_%s' % (fname,d)), - getattr(algos, 'group_%s_bin_%s' % (fname,d)), - np_funcs[fname], - d, - check_less_precise] - self._check_versions(*args) - - def _check_versions(self, irr_func, bin_func, np_func, dtype, check_less_precise): - obj = self.obj.astype(dtype) - - cts = np.zeros(3, dtype=np.int64) - exp = np.zeros((3, 1), dtype) - irr_func(exp, cts, obj, self.labels) - - # bin-based version - bins = np.array([3, 6], dtype=np.int64) - out = np.zeros((3, 1), dtype) - counts = np.zeros(len(out), dtype=np.int64) - bin_func(out, counts, obj, bins) - - assert_almost_equal(out, exp, check_less_precise=check_less_precise) - - bins = np.array([3, 9, 10], dtype=np.int64) - out = np.zeros((3, 1), dtype) - counts = np.zeros(len(out), dtype=np.int64) - bin_func(out, counts, obj, bins) - exp = np.array([np_func(obj[:3]), np_func(obj[3:9]), - np_func(obj[9:])], - dtype=dtype) - assert_almost_equal(out.squeeze(), exp, check_less_precise=check_less_precise) - - # duplicate bins - bins = np.array([3, 6, 10, 10], dtype=np.int64) - out = np.zeros((4, 1), dtype) - counts = np.zeros(len(out), dtype=np.int64) - bin_func(out, counts, obj, bins) - exp = np.array([np_func(obj[:3]), np_func(obj[3:6]), - np_func(obj[6:10]), np.nan], - dtype=dtype) - assert_almost_equal(out.squeeze(), exp, check_less_precise=check_less_precise) - def test_group_ohlc(): def _check(dtype): obj = np.array(np.random.randn(20),dtype=dtype) - bins = np.array([6, 12], dtype=np.int64) + bins = np.array([6, 12, 20], dtype=np.int64) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) + labels = np.repeat(np.arange(3), np.diff(np.r_[0, bins])) func = getattr(algos,'group_ohlc_%s' % dtype) - func(out, counts, obj[:, None], bins) + func(out, counts, obj[:, None], labels) def _ohlc(group): if isnull(group).all(): @@ -559,7 +500,7 @@ def _ohlc(group): assert_almost_equal(counts, [6, 6, 8]) obj[:6] = nan - func(out, counts, obj[:, None], bins) + func(out, counts, obj[:, None], labels) expected[0] = nan assert_almost_equal(out, expected) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 0bee6f514cad0..49d344631e4b9 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1596,7 +1596,7 @@ def test_aggregate_with_nat(self): normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - for func in ['min', 'max', 'prod']: + for func in ['min', 'max', 'sum', 'prod']: normal_result = getattr(normal_grouped, func)() dt_result = getattr(dt_grouped, func)() pad = DataFrame([[np.nan, np.nan, np.nan, np.nan]], @@ -1606,7 +1606,7 @@ def test_aggregate_with_nat(self): expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') assert_frame_equal(expected, dt_result) - for func in ['count', 'sum']: + for func in ['count']: normal_result = getattr(normal_grouped, func)() pad = DataFrame([[0, 0, 0, 0]], index=[3], columns=['A', 'B', 'C', 'D']) expected = normal_result.append(pad)