Skip to content

Commit 05123af

Browse files
committed
ENH: cumsum/cumprod refactor, consistency
1 parent 3ce46e4 commit 05123af

File tree

6 files changed

+144
-64
lines changed

6 files changed

+144
-64
lines changed

pandas/core/frame.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,31 +1959,6 @@ def sum(self, axis=0, numeric_only=False):
19591959

19601960
return Series(the_sum, index=axis_labels)
19611961

1962-
def cumsum(self, axis=0):
1963-
"""
1964-
Return DataFrame of cumulative sums over requested axis.
1965-
1966-
Parameters
1967-
----------
1968-
axis : {0, 1}
1969-
0 for row-wise, 1 for column-wise
1970-
1971-
Returns
1972-
-------
1973-
y : DataFrame
1974-
"""
1975-
y = np.array(self.values, subok=True)
1976-
if not issubclass(y.dtype.type, np.int_):
1977-
mask = np.isnan(self.values)
1978-
y[mask] = 0
1979-
result = y.cumsum(axis)
1980-
has_obs = (-mask).astype(int).cumsum(axis) > 0
1981-
result[-has_obs] = np.nan
1982-
else:
1983-
result = y.cumsum(axis)
1984-
return type(self)(result, index=self.index, columns=self.columns,
1985-
copy=False)
1986-
19871962
def min(self, axis=0):
19881963
"""
19891964
Return array or Series of minimums over requested axis.
@@ -2018,30 +1993,6 @@ def max(self, axis=0):
20181993
np.putmask(values, -np.isfinite(values), -np.inf)
20191994
return Series(values.max(axis), index=self._get_agg_axis(axis))
20201995

2021-
def cumprod(self, axis=0):
2022-
"""
2023-
Return cumulative product over requested axis as DataFrame
2024-
2025-
Parameters
2026-
----------
2027-
axis : {0, 1}
2028-
0 for row-wise, 1 for column-wise
2029-
2030-
Returns
2031-
-------
2032-
y : DataFrame
2033-
"""
2034-
def get_cumprod(y):
2035-
y = np.array(y)
2036-
mask = isnull(y)
2037-
if not issubclass(y.dtype.type, np.int_):
2038-
y[mask] = 1
2039-
result = y.cumprod()
2040-
2041-
return result
2042-
2043-
return self.apply(get_cumprod, axis=axis)
2044-
20451996
def product(self, axis=0):
20461997
"""
20471998
Return array or Series of products over requested axis.

pandas/core/generic.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,52 @@ def _reindex_axis(self, new_index, fill_method, axis):
166166
new_data = self._data.reindex_axis(new_index, axis=axis,
167167
method=fill_method)
168168
return type(self)(new_data)
169+
170+
def cumsum(self, axis=0):
171+
"""
172+
Return DataFrame of cumulative sums over requested axis.
173+
174+
Parameters
175+
----------
176+
axis : {0, 1}
177+
0 for row-wise, 1 for column-wise
178+
179+
Returns
180+
-------
181+
y : DataFrame
182+
"""
183+
y = self.values.copy()
184+
if not issubclass(y.dtype.type, np.int_):
185+
mask = np.isnan(self.values)
186+
np.putmask(y, mask, 0.)
187+
result = y.cumsum(axis)
188+
np.putmask(result, mask, np.nan)
189+
else:
190+
result = y.cumsum(axis)
191+
return type(self)(result, index=self.index, columns=self.columns,
192+
copy=False)
193+
194+
def cumprod(self, axis=0):
195+
"""
196+
Return cumulative product over requested axis as DataFrame
197+
198+
Parameters
199+
----------
200+
axis : {0, 1}
201+
0 for row-wise, 1 for column-wise
202+
203+
Returns
204+
-------
205+
y : DataFrame
206+
"""
207+
y = self.values.copy()
208+
if not issubclass(y.dtype.type, np.int_):
209+
mask = np.isnan(self.values)
210+
np.putmask(y, mask, 1.)
211+
result = y.cumprod(axis)
212+
np.putmask(result, mask, np.nan)
213+
else:
214+
result = y.cumprod(axis)
215+
return type(self)(result, index=self.index, columns=self.columns,
216+
copy=False)
217+

pandas/core/series.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -577,21 +577,37 @@ def cumsum(self, axis=0, dtype=None, out=None):
577577
-------
578578
579579
"""
580-
arr = self.copy()
581-
okLocs = notnull(arr)
582-
result = np.cumsum(arr.view(ndarray)[okLocs])
583-
arr = arr.astype(result.dtype)
584-
arr[okLocs] = result
585-
return arr
580+
arr = self.values.copy()
581+
582+
do_mask = not issubclass(self.dtype.type, np.int_)
583+
if do_mask:
584+
mask = isnull(arr)
585+
np.putmask(arr, mask, 0.)
586+
587+
result = arr.cumsum()
588+
589+
if do_mask:
590+
np.putmask(result, mask, np.nan)
591+
592+
return Series(result, index=self.index)
586593

587594
def cumprod(self, axis=0, dtype=None, out=None):
588595
"""
589596
Overriding numpy's built-in cumprod functionality
590597
"""
591-
arr = self.copy()
592-
okLocs = notnull(arr)
593-
arr[okLocs] = np.cumprod(arr.view(ndarray)[okLocs])
594-
return arr
598+
arr = self.values.copy()
599+
600+
do_mask = not issubclass(self.dtype.type, np.int_)
601+
if do_mask:
602+
mask = isnull(arr)
603+
np.putmask(arr, mask, 1.)
604+
605+
result = arr.cumprod()
606+
607+
if do_mask:
608+
np.putmask(result, mask, np.nan)
609+
610+
return Series(result, index=self.index)
595611

596612
def median(self):
597613
"""

pandas/core/sparse.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import operator
1212

13-
from pandas.core.common import (isnull, _pickle_array, _unpickle_array,
13+
from pandas.core.common import (isnull, notnull, _pickle_array, _unpickle_array,
1414
_mut_exclusive, _ensure_index, _try_sort)
1515
from pandas.core.index import Index, NULL_INDEX
1616
from pandas.core.series import Series, TimeSeries
@@ -510,6 +510,23 @@ def sum(self, axis=None, dtype=None, out=None):
510510
nsparse = self.sp_index.npoints
511511
return sp_sum + self.fill_value * nsparse
512512

513+
def cumsum(self, axis=0, dtype=None, out=None):
514+
"""
515+
Cumulative sum of values. Preserves NaN values
516+
517+
Extra parameters are to preserve ndarray interface.
518+
519+
Returns
520+
-------
521+
522+
"""
523+
if not np.isnan(self.fill_value):
524+
return self.to_dense().cumsum()
525+
return SparseSeries(self.sp_values.cumsum(),
526+
index=self.index,
527+
sparse_index=self.sp_index,
528+
fill_value=self.fill_value)
529+
513530
def mean(self, axis=None, dtype=None, out=None):
514531
"""
515532
Mean of non-null values
@@ -1057,6 +1074,21 @@ def count(self, axis=0, **kwds):
10571074
"""
10581075
return self.apply(SparseSeries.count, axis=axis)
10591076

1077+
def cumsum(self, axis=0):
1078+
"""
1079+
Return SparseDataFrame of cumulative sums over requested axis.
1080+
1081+
Parameters
1082+
----------
1083+
axis : {0, 1}
1084+
0 for row-wise, 1 for column-wise
1085+
1086+
Returns
1087+
-------
1088+
y : SparseDataFrame
1089+
"""
1090+
return self.apply(SparseSeries.cumsum, axis=axis)
1091+
10601092
def shift(self, periods, offset=None, timeRule=None):
10611093
"""
10621094
Analogous to DataFrame.shift

pandas/tests/test_frame.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2269,22 +2269,43 @@ def test_quantile(self):
22692269
self.assertEqual(q['A'], scoreatpercentile(self.intframe['A'], 10))
22702270

22712271
def test_cumsum(self):
2272+
self.tsframe.ix[5:10, 0] = nan
2273+
self.tsframe.ix[10:15, 1] = nan
2274+
self.tsframe.ix[15:, 2] = nan
2275+
2276+
# axis = 0
22722277
cumsum = self.tsframe.cumsum()
2273-
assert_series_equal(cumsum['A'], np.cumsum(self.tsframe['A'].fillna(0)))
2274-
df = self.klass({'A' : np.arange(20)}, index=np.arange(20))
2278+
expected = self.tsframe.apply(Series.cumsum)
2279+
assert_frame_equal(cumsum, expected)
2280+
2281+
# axis = 1
2282+
cumsum = self.tsframe.cumsum(axis=1)
2283+
expected = self.tsframe.apply(Series.cumsum, axis=1)
2284+
assert_frame_equal(cumsum, expected)
22752285

22762286
# works
2287+
df = self.klass({'A' : np.arange(20)}, index=np.arange(20))
22772288
result = df.cumsum()
22782289

22792290
# fix issue
22802291
cumsum_xs = self.tsframe.cumsum(axis=1)
22812292
self.assertEqual(np.shape(cumsum_xs), np.shape(self.tsframe))
22822293

2294+
22832295
def test_cumprod(self):
2296+
self.tsframe.ix[5:10, 0] = nan
2297+
self.tsframe.ix[10:15, 1] = nan
2298+
self.tsframe.ix[15:, 2] = nan
2299+
2300+
# axis = 0
22842301
cumprod = self.tsframe.cumprod()
2302+
expected = self.tsframe.apply(Series.cumprod)
2303+
assert_frame_equal(cumprod, expected)
22852304

2286-
assert_series_equal(cumprod['A'],
2287-
np.cumprod(self.tsframe['A'].fillna(1)))
2305+
# axis = 1
2306+
cumprod = self.tsframe.cumprod(axis=1)
2307+
expected = self.tsframe.apply(Series.cumprod, axis=1)
2308+
assert_frame_equal(cumprod, expected)
22882309

22892310
# fix issue
22902311
cumprod_xs = self.tsframe.cumprod(axis=1)

pandas/tests/test_sparse.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,11 @@ def test_shift(self):
575575
f = lambda s: s.shift(2, offset=datetools.bday)
576576
_dense_series_compare(series, f)
577577

578+
def test_cumsum(self):
579+
result = self.bseries.cumsum()
580+
expected = self.bseries.to_dense().cumsum()
581+
self.assert_(isinstance(result, SparseSeries))
582+
assert_series_equal(result.to_dense(), expected)
578583

579584
class TestSparseTimeSeries(TestCase):
580585
pass
@@ -1066,6 +1071,12 @@ def test_count(self):
10661071
dense_result = self.frame.to_dense().count(1)
10671072
assert_series_equal(result, dense_result)
10681073

1074+
def test_cumsum(self):
1075+
result = self.frame.cumsum()
1076+
expected = self.frame.to_dense().cumsum()
1077+
self.assert_(isinstance(result, SparseDataFrame))
1078+
assert_frame_equal(result.to_dense(), expected)
1079+
10691080
def _check_all(self, check_func):
10701081
check_func(self.frame)
10711082
check_func(self.iframe)

0 commit comments

Comments
 (0)