Skip to content

Fix: Pandas rolling removes imaginary part of complex #47028

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import numpy as np

cimport numpy as cnp
from numpy cimport (
complex64_t,
float32_t,
float64_t,
int64_t,
Expand All @@ -26,6 +27,11 @@ from pandas._libs.algos import is_monotonic

from pandas._libs.dtypes cimport numeric_t

from pandas.core.dtypes.common import ensure_float64

ctypedef fused float_complex_t:
float64_t
complex64_t

cdef extern from "../src/skiplist.h":
ctypedef struct node_t:
Expand Down Expand Up @@ -129,9 +135,10 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
sum_x[0] = t


def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
def roll_sum(float_complex_t[:] float_complex_values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
cdef:
float64_t[:] values = ensure_float64(float_complex_values)
Py_ssize_t i, j
float64_t sum_x, compensation_add, compensation_remove, prev_value
int64_t s, e, num_consecutive_same_value
Expand Down Expand Up @@ -234,7 +241,6 @@ cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
num_consecutive_same_value[0] = 1
prev_value[0] = val


cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
Py_ssize_t *neg_ct, float64_t *compensation) nogil:
""" remove a value from the mean calc using Kahan summation """
Expand All @@ -251,9 +257,10 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
neg_ct[0] = neg_ct[0] - 1


def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
def roll_mean(float_complex_t[:] float_complex_values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
cdef:
float64_t[:] values = ensure_float64(float_complex_values)
float64_t val, compensation_add, compensation_remove, sum_x, prev_value
int64_t s, e, num_consecutive_same_value
Py_ssize_t nobs, i, j, neg_ct, N = len(start)
Expand Down Expand Up @@ -387,12 +394,13 @@ cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x,
ssqdm_x[0] = 0


def roll_var(const float64_t[:] values, ndarray[int64_t] start,
def roll_var(float_complex_t[:] float_complex_values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp, int ddof=1) -> np.ndarray:
"""
Numerically stable implementation using Welford's method.
"""
cdef:
float64_t[:] values = ensure_float64(float_complex_values)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is a suitable approach since it defeats the purpose of using a memory view in the input.

float64_t mean_x, ssqdm_x, nobs, compensation_add,
float64_t compensation_remove, prev_value
int64_t s, e, num_consecutive_same_value
Expand Down Expand Up @@ -562,9 +570,10 @@ cdef inline void remove_skew(float64_t val, int64_t *nobs,
xxx[0] = t


def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
def roll_skew(float_complex_t[:] float_complex_values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
cdef:
float64_t[:] values = ensure_float64(float_complex_values)
Py_ssize_t i, j
float64_t val, prev, min_val, mean_val, sum_val = 0
float64_t compensation_xxx_add, compensation_xxx_remove
Expand Down Expand Up @@ -775,9 +784,10 @@ cdef inline void remove_kurt(float64_t val, int64_t *nobs,
xxxx[0] = t


def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
def roll_kurt(float_complex_t[:] float_complex_values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
cdef:
float64_t[:] values = ensure_float64(float_complex_values)
Py_ssize_t i, j
float64_t val, prev, mean_val, min_val, sum_val = 0
float64_t compensation_xxxx_add, compensation_xxxx_remove
Expand Down Expand Up @@ -869,9 +879,10 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
# Rolling median, min, max


def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
def roll_median_c(float_complex_t[:] float_complex_values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
cdef:
float64_t[:] values = ensure_float64(float_complex_values)
Py_ssize_t i, j
bint err = False, is_monotonic_increasing_bounds
int midpoint, ret = 0
Expand Down Expand Up @@ -1010,7 +1021,7 @@ cdef inline numeric_t calc_mm(int64_t minp, Py_ssize_t nobs,
return result


def roll_max(ndarray[float64_t] values, ndarray[int64_t] start,
def roll_max(float_complex_t[:] float_complex_values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
"""
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.
Expand All @@ -1031,10 +1042,12 @@ def roll_max(ndarray[float64_t] values, ndarray[int64_t] start,
-------
np.ndarray[float]
"""
cdef:
ndarray[float64_t] values = ensure_float64(float_complex_values)
return _roll_min_max(values, start, end, minp, is_max=1)


def roll_min(ndarray[float64_t] values, ndarray[int64_t] start,
def roll_min(float_complex_t[:] float_complex_values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
"""
Moving min of 1d array of any numeric type along axis=0 ignoring NaNs.
Expand All @@ -1052,6 +1065,8 @@ def roll_min(ndarray[float64_t] values, ndarray[int64_t] start,
-------
np.ndarray[float]
"""
cdef:
ndarray[float64_t] values = ensure_float64(float_complex_values)
return _roll_min_max(values, start, end, minp, is_max=0)


Expand Down
6 changes: 5 additions & 1 deletion pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from pandas.core.dtypes.common import (
ensure_float64,
is_bool,
is_complex_dtype,
is_integer,
is_list_like,
is_scalar,
Expand Down Expand Up @@ -363,7 +364,10 @@ def _prep_values(self, values: ArrayLike) -> np.ndarray:
if isinstance(values, ExtensionArray):
values = values.to_numpy(np.float64, na_value=np.nan)
else:
values = ensure_float64(values)
if is_complex_dtype(values):
values = values.astype(np.complex64)
else:
values = ensure_float64(values)
except (ValueError, TypeError) as err:
raise TypeError(f"cannot handle this type -> {values.dtype}") from err

Expand Down
16 changes: 15 additions & 1 deletion pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,6 @@ def test_rolling_axis_count(axis_frame):
def test_readonly_array():
# GH-27766
arr = np.array([1, 3, np.nan, 3, 5])
arr.setflags(write=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can't be removed since it's the readonly part of the test

result = Series(arr).rolling(2).mean()
expected = Series([np.nan, 2, np.nan, np.nan, 4])
tm.assert_series_equal(result, expected)
Expand Down Expand Up @@ -1871,3 +1870,18 @@ def test_rolling_skew_kurt_floating_artifacts():
assert (result[-2:] == 0).all()
result = r.kurt()
assert (result[-2:] == -3).all()


def test_rolling_imaginary_part_of_complex(arithmetic_win_operators):
# GH 46619
func_name = arithmetic_win_operators
df = DataFrame([1j, 1 + 2j])
result = getattr(
df.rolling(2),
func_name,
)()
expected = getattr(
DataFrame([0, 1]).rolling(2),
func_name,
)()
tm.assert_frame_equal(result, expected)