@@ -1160,75 +1160,68 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
1160
1160
"""
1161
1161
Numerically stable implementation using Welford's method.
1162
1162
"""
1163
- cdef double val, prev, mean_x = 0 , ssqdm_x = 0 , nobs = 0 , delta
1164
- cdef Py_ssize_t i
1163
+ cdef double val, prev, mean_x = 0 , ssqdm_x = 0 , delta, rep = NaN
1164
+ cdef Py_ssize_t nobs = 0 , nrep = 0 , i
1165
1165
cdef Py_ssize_t N = len (input )
1166
1166
1167
1167
cdef ndarray[double_t] output = np.empty(N, dtype = float )
1168
1168
1169
1169
minp = _check_minp(win, minp, N)
1170
1170
1171
- # Check for windows larger than array, addresses #7297
1172
- win = min (win, N)
1173
-
1174
- # Over the first window, observations can only be added, never removed
1175
- for i from 0 <= i < win:
1171
+ for i from 0 <= i < N:
1176
1172
val = input [i]
1173
+ prev = NaN if i < win else input [i - win]
1174
+
1175
+ # First, count the number of observations and consecutive repeats
1176
+ if prev == prev:
1177
+ # prev is not NaN, removing an observation...
1178
+ if nobs == nrep:
1179
+ # ...and removing a repeat
1180
+ nrep -= 1
1181
+ if nrep == 0 :
1182
+ rep = NaN
1183
+ nobs -= 1
1177
1184
1178
- # Not NaN
1179
1185
if val == val:
1180
- nobs += 1
1181
- delta = (val - mean_x)
1182
- mean_x += delta / nobs
1183
- ssqdm_x += delta * (val - mean_x)
1184
-
1185
- if nobs >= minp:
1186
- # pathological case
1187
- if nobs == 1 :
1188
- val = 0
1186
+ # next is not NaN, adding an observation...
1187
+ if val == prev:
1188
+ # ...and adding a repeat
1189
+ nrep += 1
1189
1190
else :
1190
- val = ssqdm_x / (nobs - ddof)
1191
- if val < 0 :
1192
- val = 0
1193
- else :
1194
- val = NaN
1195
-
1196
- output[i] = val
1197
-
1198
- # After the first window, observations can both be added and removed
1199
- for i from win <= i < N:
1200
- val = input [i]
1201
- prev = input [i - win]
1191
+ # ...and resetting repeats
1192
+ nrep = 1
1193
+ rep = val
1194
+ nobs += 1
1202
1195
1203
- if val == val:
1196
+ # Then, compute the new mean and sum of squared differences
1197
+ if nobs == nrep:
1198
+ # All non-NaN values in window are identical...
1199
+ ssqdm_x = 0
1200
+ mean_x = rep if nobs > 0 else 0
1201
+ elif val == val:
1202
+ # Adding one observation...
1204
1203
if prev == prev:
1205
- # Adding one observation and removing another one
1204
+ # ... and removing another
1206
1205
delta = val - prev
1207
1206
prev -= mean_x
1208
1207
mean_x += delta / nobs
1209
1208
val -= mean_x
1210
1209
ssqdm_x += (val + prev) * delta
1211
1210
else :
1212
- # Adding one observation and not removing any
1213
- nobs += 1
1211
+ # ...and not removing any
1214
1212
delta = (val - mean_x)
1215
1213
mean_x += delta / nobs
1216
1214
ssqdm_x += delta * (val - mean_x)
1217
1215
elif prev == prev:
1218
1216
# Adding no new observation, but removing one
1219
- nobs -= 1
1220
- if nobs:
1221
- delta = (prev - mean_x)
1222
- mean_x -= delta / nobs
1223
- ssqdm_x -= delta * (prev - mean_x)
1224
- else :
1225
- mean_x = 0
1226
- ssqdm_x = 0
1217
+ delta = (prev - mean_x)
1218
+ mean_x -= delta / nobs
1219
+ ssqdm_x -= delta * (prev - mean_x)
1227
1220
# Variance is unchanged if no observation is added or removed
1228
1221
1222
+ # Finally, compute and write the rolling variance to the output array
1229
1223
if nobs >= minp:
1230
- # pathological case
1231
- if nobs == 1 :
1224
+ if nobs <= ddof:
1232
1225
val = 0
1233
1226
else :
1234
1227
val = ssqdm_x / (nobs - ddof)
0 commit comments