Skip to content

ENH: ExponentialMovingWindow.sum #43871

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Oct 6, 2021
1 change: 1 addition & 0 deletions doc/source/reference/window.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Exponentially-weighted window functions
:toctree: api/

ExponentialMovingWindow.mean
ExponentialMovingWindow.sum
ExponentialMovingWindow.std
ExponentialMovingWindow.var
ExponentialMovingWindow.corr
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ Other enhancements
- Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
- :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
- The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
-
- Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)

.. ---------------------------------------------------------------------------

Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/window/aggregations.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def roll_weighted_var(
minp: int, # int64_t
ddof: int, # unsigned int
) -> np.ndarray: ... # np.ndarray[np.float64]
def ewma(
def ewm(
vals: np.ndarray, # const float64_t[:]
start: np.ndarray, # const int64_t[:]
end: np.ndarray, # const int64_t[:]
Expand All @@ -109,6 +109,7 @@ def ewma(
adjust: bool,
ignore_na: bool,
deltas: np.ndarray, # const float64_t[:]
normalize: bool,
) -> np.ndarray: ... # np.ndarray[np.float64]
def ewmcov(
input_x: np.ndarray, # const float64_t[:]
Expand Down
57 changes: 30 additions & 27 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1604,13 +1604,13 @@ def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,


# ----------------------------------------------------------------------
# Exponentially weighted moving average
# Exponentially weighted moving

def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
int minp, float64_t com, bint adjust, bint ignore_na,
const float64_t[:] deltas=None) -> np.ndarray:
def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
int minp, float64_t com, bint adjust, bint ignore_na,
const float64_t[:] deltas=None, bint normalize=True) -> np.ndarray:
"""
Compute exponentially-weighted moving average using center-of-mass.
Compute exponentially-weighted moving average or sum using center-of-mass.

Parameters
----------
Expand All @@ -1623,6 +1623,8 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
ignore_na : bool
deltas : ndarray (float64 type), optional. If None, implicitly assumes equally
spaced points (used when `times` is not passed)
normalize : bool, optional.
If True, calculate the mean. If False, calculate the sum.

Returns
-------
Expand All @@ -1634,7 +1636,7 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
const float64_t[:] sub_vals
const float64_t[:] sub_deltas=None
ndarray[float64_t] sub_output, output = np.empty(N, dtype=np.float64)
float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur
float64_t alpha, old_wt_factor, new_wt, weighted, old_wt, cur
bint is_observation, use_deltas

if N == 0:
Expand All @@ -1657,48 +1659,49 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
win_size = len(sub_vals)
sub_output = np.empty(win_size, dtype=np.float64)

weighted_avg = sub_vals[0]
is_observation = weighted_avg == weighted_avg
weighted = sub_vals[0]
is_observation = weighted == weighted
nobs = int(is_observation)
sub_output[0] = weighted_avg if nobs >= minp else NaN
sub_output[0] = weighted if nobs >= minp else NaN
old_wt = 1.

with nogil:
for i in range(1, win_size):
cur = sub_vals[i]
is_observation = cur == cur
nobs += is_observation
if weighted_avg == weighted_avg:
if weighted == weighted:

if is_observation or not ignore_na:
if use_deltas:
old_wt *= old_wt_factor ** sub_deltas[i - 1]
if normalize:
if use_deltas:
old_wt *= old_wt_factor ** sub_deltas[i - 1]
else:
old_wt *= old_wt_factor
else:
old_wt *= old_wt_factor
weighted = old_wt_factor * weighted
if is_observation:

# avoid numerical errors on constant series
if weighted_avg != cur:
weighted_avg = ((old_wt * weighted_avg) +
(new_wt * cur)) / (old_wt + new_wt)
if adjust:
old_wt += new_wt
if normalize:
# avoid numerical errors on constant series
if weighted != cur:
weighted = old_wt * weighted + new_wt * cur
weighted /= (old_wt + new_wt)
if adjust:
old_wt += new_wt
else:
old_wt = 1.
else:
old_wt = 1.
weighted += cur
elif is_observation:
weighted_avg = cur
weighted = cur

sub_output[i] = weighted_avg if nobs >= minp else NaN
sub_output[i] = weighted if nobs >= minp else NaN

output[s:e] = sub_output

return output


# ----------------------------------------------------------------------
# Exponentially weighted moving covariance


def ewmcov(const float64_t[:] input_x, const int64_t[:] start, const int64_t[:] end,
int minp, const float64_t[:] input_y, float64_t com, bint adjust,
bint ignore_na, bint bias) -> np.ndarray:
Expand Down
85 changes: 73 additions & 12 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@
window_agg_numba_parameters,
)
from pandas.core.window.numba_ import (
generate_ewma_numba_table_func,
generate_numba_ewma_func,
generate_numba_ewm_func,
generate_numba_ewm_table_func,
)
from pandas.core.window.online import (
EWMMeanState,
Expand Down Expand Up @@ -469,17 +469,21 @@ def aggregate(self, func, *args, **kwargs):
def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
if maybe_use_numba(engine):
if self.method == "single":
ewma_func = generate_numba_ewma_func(
engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas
)
numba_cache_key = (lambda x: x, "ewma")
func = generate_numba_ewm_func
numba_cache_key = (lambda x: x, "ewm_mean")
else:
ewma_func = generate_ewma_numba_table_func(
engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas
)
numba_cache_key = (lambda x: x, "ewma_table")
func = generate_numba_ewm_table_func
numba_cache_key = (lambda x: x, "ewm_mean_table")
ewm_func = func(
engine_kwargs=engine_kwargs,
com=self._com,
adjust=self.adjust,
ignore_na=self.ignore_na,
deltas=self._deltas,
normalize=True,
)
return self._apply(
ewma_func,
ewm_func,
numba_cache_key=numba_cache_key,
)
elif engine in ("cython", None):
Expand All @@ -489,11 +493,68 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):

deltas = None if self.times is None else self._deltas
window_func = partial(
window_aggregations.ewma,
window_aggregations.ewm,
com=self._com,
adjust=self.adjust,
ignore_na=self.ignore_na,
deltas=deltas,
normalize=True,
)
return self._apply(window_func)
else:
raise ValueError("engine must be either 'numba' or 'cython'")

@doc(
template_header,
create_section_header("Parameters"),
args_compat,
window_agg_numba_parameters,
kwargs_compat,
create_section_header("Returns"),
template_returns,
create_section_header("See Also"),
template_see_also,
create_section_header("Notes"),
numba_notes.replace("\n", "", 1),
window_method="ewm",
aggregation_description="(exponential weighted moment) sum",
agg_method="sum",
)
def sum(self, *args, engine=None, engine_kwargs=None, **kwargs):
if not self.adjust:
raise NotImplementedError("sum is not implemented with adjust=False")
if maybe_use_numba(engine):
if self.method == "single":
func = generate_numba_ewm_func
numba_cache_key = (lambda x: x, "ewm_sum")
else:
func = generate_numba_ewm_table_func
numba_cache_key = (lambda x: x, "ewm_sum_table")
ewm_func = func(
engine_kwargs=engine_kwargs,
com=self._com,
adjust=self.adjust,
ignore_na=self.ignore_na,
deltas=self._deltas,
normalize=False,
)
return self._apply(
ewm_func,
numba_cache_key=numba_cache_key,
)
elif engine in ("cython", None):
if engine_kwargs is not None:
raise ValueError("cython engine does not accept engine_kwargs")
nv.validate_window_func("sum", args, kwargs)

deltas = None if self.times is None else self._deltas
window_func = partial(
window_aggregations.ewm,
com=self._com,
adjust=self.adjust,
ignore_na=self.ignore_na,
deltas=deltas,
normalize=False,
)
return self._apply(window_func)
else:
Expand Down
Loading