Skip to content
92 changes: 57 additions & 35 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11101,44 +11101,66 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
else:
axis = self._get_axis_number(axis)

y = com.values_from_object(self).copy()
d = self._construct_axes_dict()
d["copy"] = False
if axis == 1:
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T

def na_accum_func(blk_values):
# We will be applying this function to block values
if blk_values.dtype.kind in ["m", "M"]:
# numpy 1.18 started sorting NaTs at the end instead of beginning,
# so we need to work around to maintain backwards-consistency.
orig_dtype = blk_values.dtype

# We need to define mask before masking NaTs
mask = isna(blk_values)

if accum_func == np.minimum.accumulate:
# Note: the accum_func comparison fails as an "is" comparison
y = blk_values.view("i8")
y[mask] = np.iinfo(np.int64).max
changed = True
else:
y = blk_values
changed = False

result = accum_func(y.view("i8"), axis)
if skipna:
np.putmask(result, mask, iNaT)
elif accum_func == np.minimum.accumulate:
# Restore NaTs that we masked previously
nz = (~np.asarray(mask)).nonzero()[0]
if len(nz):
# everything up to the first non-na entry stays NaT
result[: nz[0]] = iNaT

if changed:
# restore NaT elements
y[mask] = iNaT # TODO: could try/finally for this?

if isinstance(blk_values, np.ndarray):
result = result.view(orig_dtype)
else:
# DatetimeArray
result = type(blk_values)._from_sequence(result, dtype=orig_dtype)

elif skipna and not issubclass(
blk_values.dtype.type, (np.integer, np.bool_)
):
vals = blk_values.copy().T
mask = isna(vals)
np.putmask(vals, mask, mask_a)
result = accum_func(vals, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(blk_values.T, axis)

if issubclass(y.dtype.type, (np.datetime64, np.timedelta64)):
# numpy 1.18 started sorting NaTs at the end instead of beginning,
# so we need to work around to maintain backwards-consistency.
orig_dtype = y.dtype
if accum_func == np.minimum.accumulate:
# Note: the accum_func comparison fails as an "is" comparison
# Note that "y" is always a copy, so we can safely modify it
mask = isna(self)
y = y.view("i8")
y[mask] = np.iinfo(np.int64).max

result = accum_func(y.view("i8"), axis).view(orig_dtype)
if skipna:
mask = isna(self)
np.putmask(result, mask, iNaT)
elif accum_func == np.minimum.accumulate:
# Restore NaTs that we masked previously
nz = (~np.asarray(mask)).nonzero()[0]
if len(nz):
# everything up to the first non-na entry stays NaT
result[: nz[0]] = iNaT
# transpose back for ndarray, not for EA
return result.T if hasattr(result, "T") else result

if self.ndim == 1:
# restore dt64tz dtype
d["dtype"] = self.dtype

elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
mask = isna(self)
np.putmask(y, mask, mask_a)
result = accum_func(y, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(y, axis)
result = self._data.apply(na_accum_func)

d = self._construct_axes_dict()
d["copy"] = False
return self._constructor(result, **d).__finalize__(self)

return set_function_name(cum_func, name, cls)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,10 @@ def apply(self, f: str, filter=None, **kwargs):
axis = obj._info_axis_number
kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)

applied = getattr(b, f)(**kwargs)
if callable(f):
applied = b.apply(f, **kwargs)
else:
applied = getattr(b, f)(**kwargs)
result_blocks = _extend_blocks(applied, result_blocks)

if len(result_blocks) == 0:
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1331,8 +1331,8 @@ def test_agg_cython_table(self, df, func, expected, axis):
_get_cython_table_params(
DataFrame([[np.nan, 1], [1, 2]]),
[
("cumprod", DataFrame([[np.nan, 1], [1.0, 2.0]])),
("cumsum", DataFrame([[np.nan, 1], [1.0, 3.0]])),
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
],
),
),
Expand All @@ -1341,6 +1341,10 @@ def test_agg_cython_table_transform(self, df, func, expected, axis):
# GH 21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
if axis == "columns" or axis == 1:
# operating blockwise doesn't let us preserve dtypes
expected = expected.astype("float64")

result = df.agg(func, axis=axis)
tm.assert_frame_equal(result, expected)

Expand Down