@@ -1253,6 +1253,166 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
12531253 return result
12541254
12551255
1256+ # ----------------------------------------------------------------------
1257+ # Rolling correlation
1258+
1259+ def roll_corr (ndarray[double_t] x , ndarray[double_t] y , int win , int minp ,
1260+ int ddof = 1 ):
1261+ """
1262+ Numerically stable implementation using a Welford-like method, and
1263+ detection of exactly matching sequences and exactly zero denominators.
1264+ """
1265+ cdef double val_x, val_y, prev_x, prev_y, rep_x = NaN, rep_y = NaN
1266+ cdef double mean_x = 0 , mean_y = 0 , ssqdm_x = 0 , ssqdm_y = 0
1267+ cdef double sproddm_xy = 0 , delta_x, delta_y
1268+ cdef Py_ssize_t nobs = 0 , nrep_x = 0 , nrep_y = 0 , ndup = 0 , i, N = len (x)
1269+ cdef bint val_not_nan, prev_not_nan
1270+
1271+ cdef ndarray[double_t] output = np.empty(N, dtype = float )
1272+
1273+ minp = _check_minp(win, minp, N)
1274+
1275+ for i from 0 <= i < N:
1276+ val_x = x[i]
1277+ val_y = y[i]
1278+ val_not_nan = val_x == val_x and val_y == val_y
1279+ if i < win:
1280+ prev_x = prev_y = NaN
1281+ prev_not_nan = 0
1282+ else :
1283+ prev_x = x[i - win]
1284+ prev_y = y[i - win]
1285+ prev_not_nan = prev_x == prev_x and prev_y == prev_y
1286+
1287+ # First, count the number of observations and consecutive repeats
1288+ if prev_not_nan:
1289+ # prev is not NaN, removing an observation...
1290+ if nobs == nrep_x:
1291+ # ...and removing a repeat from x
1292+ nrep_x -= 1
1293+ if nrep_x == 0 :
1294+ rep_x = NaN
1295+ if nobs == nrep_y:
1296+ # ...and removing a repeat from y
1297+ nrep_y -= 1
1298+ if nrep_y == 0 :
1299+ rep_y = NaN
1300+ if nobs == ndup:
1301+ # ...and removing a duplicate
1302+ ndup -= 1
1303+ nobs -= 1
1304+ if val_not_nan:
1305+ # val is not NaN, adding an observation
1306+ if val_x == rep_x:
1307+ # ...and adding a repeat to x
1308+ nrep_x += 1
1309+ else :
1310+ # ...and resetting x repeats
1311+ nrep_x = 1
1312+ rep_x = val_x
1313+ if val_y == rep_y:
1314+ # ...and adding a repeat to y
1315+ nrep_y += 1
1316+ else :
1317+ # ...and resetting y repeats
1318+ nrep_y = 1
1319+ rep_y = val_y
1320+ if val_x == val_y:
1321+ # ...and adding a duplicate
1322+ ndup += 1
1323+ else :
1324+ # ...and resetting duplicates
1325+ ndup = 0
1326+ nobs += 1
1327+
1328+ # Then, compute the new mean, sums of squared differences from the
1329+ # mean and sum of product of differences from the mean
1330+ if nobs == nrep_x and nobs == nrep_y:
1331+ if nobs > 0 :
1332+ mean_x = rep_x
1333+ mean_y = rep_y
1334+ else :
1335+ mean_x = mean_y = 0
1336+ ssqdm_x = ssqdm_y = sproddm_xy = 0
1337+ elif val_not_nan:
1338+ # Adding one observation...
1339+ if prev_not_nan:
1340+ # ...and removing another
1341+ delta_x = val_x - prev_x
1342+ prev_x -= mean_x
1343+ if nobs == nrep_x:
1344+ mean_x = rep_x
1345+ val_x = 0
1346+ ssqdm_x = 0
1347+ else :
1348+ mean_x += delta_x / nobs
1349+ val_x -= mean_x
1350+ ssqdm_x += (val_x + prev_x) * delta_x
1351+ delta_y = val_y - prev_y
1352+ prev_y -= mean_y
1353+ if nobs == nrep_y:
1354+ mean_y = rep_y
1355+ val_y = 0
1356+ ssqdm_y = 0
1357+ else :
1358+ mean_y += delta_y / nobs
1359+ val_y -= mean_y
1360+ ssqdm_y += (val_y + prev_y) * delta_y
1361+ sproddm_xy += (delta_x * (val_y + prev_y) +
1362+ delta_y * (val_x + prev_x)) * 0.5
1363+ else :
1364+ # ...and not removing any
1365+ if nobs == nrep_x:
1366+ delta_x = 0
1367+ mean_x = rep_x
1368+ ssqdm_x = 0
1369+ else :
1370+ delta_x = val_x - mean_x
1371+ mean_x += delta_x / nobs
1372+ ssqdm_x += delta_x * (val_x - mean_x)
1373+ if nobs == nrep_y:
1374+ delta_y = 0
1375+ mean_y = rep_y
1376+ ssqdm_y = 0
1377+ else :
1378+ delta_y = val_y - mean_y
1379+ mean_y += delta_y / nobs
1380+ ssqdm_y += delta_y * (val_y - mean_y)
1381+ sproddm_xy += ((val_x - mean_x) * delta_y +
1382+ (val_y - mean_y) * delta_x) * 0.5
1383+ elif prev_not_nan:
1384+ # Adding no new observation, but removing one
1385+ if nobs == nrep_x:
1386+ delta_x = 0
1387+ mean_x = rep_x
1388+ ssqdm_x = 0
1389+ else :
1390+ delta_x = prev_x - mean_x
1391+ mean_x -= delta_x / nobs
1392+ ssqdm_x -= delta_x * (prev_x - mean_x)
1393+ if nobs == nrep_y:
1394+ delta_y = 0
1395+ mean_y = rep_y
1396+ ssqdm_y = 0
1397+ else :
1398+ delta_y = prev_y - mean_y
1399+ mean_y -= delta_y / nobs
1400+ ssqdm_y -= delta_y * (prev_y - mean_y)
1401+ sproddm_xy -= ((prev_x - mean_x) * delta_y +
1402+ (prev_y - mean_y) * delta_x) * 0.5
1403+ # Correlation is unchanged if no observation is added or removed
1404+
1405+ # Finally, compute and write the rolling correlation to output
1406+ if nobs < minp or nobs < ddof or ssqdm_x <= 0 or ssqdm_y <= 0 :
1407+ output[i] = NaN
1408+ elif nobs == ndup:
1409+ output[i] = 1.0
1410+ else :
1411+ output[i] = sproddm_xy / sqrt(ssqdm_x * ssqdm_y)
1412+
1413+ return output
1414+
1415+
12561416# ----------------------------------------------------------------------
12571417# Rolling covariance
12581418
@@ -1261,8 +1421,8 @@ def roll_cov(ndarray[double_t] x, ndarray[double_t] y, int win, int minp,
12611421 """
12621422 Numerically stable implementation using a Welford-like method.
12631423 """
1264- cdef double val_x, val_y, prev_x, prev_y, delta_x, delta_y, temp_x, temp_y
1265- cdef double mean_x = 0 , mean_y = 0 , proddm_xy = 0
1424+ cdef double val_x, val_y, prev_x, prev_y, delta_x, delta_y
1425+ cdef double mean_x = 0 , mean_y = 0 , sproddm_xy = 0
12661426 cdef Py_ssize_t nobs = 0 , i, N = len (x)
12671427 cdef bint val_not_nan, prev_not_nan
12681428
@@ -1294,16 +1454,17 @@ def roll_cov(ndarray[double_t] x, ndarray[double_t] y, int win, int minp,
12941454 prev_y -= mean_y
12951455 mean_y += delta_y / nobs
12961456 val_y -= mean_y
1297- proddm_xy += (delta_x * (val_y + prev_y) + delta_y * (val_x + prev_x)) / 2
1457+ sproddm_xy += (delta_x * (val_y + prev_y) +
1458+ delta_y * (val_x + prev_x)) * 0.5
12981459 else :
12991460 # ...and not removing any
13001461 nobs += 1
13011462 delta_x = val_x - mean_x
13021463 mean_x += delta_x / nobs
13031464 delta_y = val_y - mean_y
13041465 mean_y += delta_y / nobs
1305- proddm_xy += ((val_x - mean_x) * delta_y +
1306- (val_y - mean_y) * delta_x) * 0.5
1466+ sproddm_xy += ((val_x - mean_x) * delta_y +
1467+ (val_y - mean_y) * delta_x) * 0.5
13071468 elif prev_not_nan:
13081469 # Adding no new observation, but removing one
13091470 nobs -= 1
@@ -1312,15 +1473,15 @@ def roll_cov(ndarray[double_t] x, ndarray[double_t] y, int win, int minp,
13121473 mean_x -= delta_x / nobs
13131474 delta_y = prev_y - mean_y
13141475 mean_y -= delta_y / nobs
1315- proddm_xy -= ((prev_x - mean_x) * delta_y +
1316- (prev_y - mean_y) * delta_x) * 0.5
1476+ sproddm_xy -= ((prev_x - mean_x) * delta_y +
1477+ (prev_y - mean_y) * delta_x) * 0.5
13171478 else :
1318- mean_x = mean_y = proddm_xy = 0
1479+ mean_x = mean_y = sproddm_xy = 0
13191480 # Covariance is unchanged if no observation is added or removed
13201481
13211482 # Finally, compute and write the rolling covariance to output
13221483 if nobs >= minp and nobs > ddof:
1323- output[i] = proddm_xy / (nobs - ddof)
1484+ output[i] = sproddm_xy / (nobs - ddof)
13241485 else :
13251486 output[i] = NaN
13261487
@@ -1334,7 +1495,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
13341495 """
13351496 Numerically stable implementation using Welford's method.
13361497 """
1337- cdef double val, prev, mean_x = 0 , ssqdm_x = 0 , nobs = 0 , delta, out
1498+ cdef double val, prev, mean_x = 0 , ssqdm_x = 0 , nobs = 0 , delta
13381499 cdef Py_ssize_t i
13391500 cdef Py_ssize_t N = len (input )
13401501
@@ -1350,7 +1511,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
13501511 # Adding one observation...
13511512 if prev == prev:
13521513 # ...and removing another
1353- delta val - prev
1514+ delta = val - prev
13541515 prev -= mean_x
13551516 mean_x += delta / nobs
13561517 val -= mean_x
@@ -1362,7 +1523,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
13621523 mean_x += delta / nobs
13631524 ssqdm_x += delta * (val - mean_x)
13641525 elif prev == prev:
1365- # adding no new observation, but removing one
1526+ # Adding no new observation, but removing one
13661527 nobs -= 1
13671528 if nobs:
13681529 delta = prev - mean_x
@@ -1372,13 +1533,14 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
13721533 mean_x = ssqdm_x = 0
13731534 # Variance is unchanged if no observation is added or removed
13741535
1536+ # Sum of squared differences from the mean must be non-negative
1537+ ssqdm_x = 0 if ssqdm_x < 0 else ssqdm_x
1538+
1539+ # Finally, compute and write the rolling variance to output
13751540 if nobs >= minp and nobs > ddof:
1376- out = ssqdm_x / (nobs - ddof)
1377- out = 0 if out < 0 else out
1541+ output[i] = ssqdm_x / (nobs - ddof)
13781542 else :
1379- out = NaN
1380-
1381- output[i] = out
1543+ output[i] = NaN
13821544
13831545 return output
13841546
0 commit comments