From cfeca09f4d5eed8e3df4e16eec749b92b612571c Mon Sep 17 00:00:00 2001 From: SoulofAkuma Date: Sat, 15 Nov 2025 13:53:40 +0100 Subject: [PATCH 1/5] Fix rolling skew/kurt for low variance windows --- pandas/_libs/window/aggregations.pyx | 30 ++++++++++++------- pandas/tests/window/conftest.py | 8 +++++ pandas/tests/window/test_rolling_skew_kurt.py | 10 +++++++ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 89530c6c9c46c..f8e43dfa78c81 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -494,12 +494,16 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs, int64_t num_consecutive_same_value ) noexcept nogil: cdef: - float64_t result, dnobs + float64_t result, dnobs, m2_cutoff float64_t moments_ratio, correction if nobs >= minp: dnobs = nobs + # Relative cutoff as introduced in #62405 + # See the comment in nanops.nankurt for further explanation + m2_cutoff = ((EpsF64 * mean) ** 2) * dnobs + if nobs < 3: result = NaN # GH 42064 46431 @@ -512,10 +516,11 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs, # # in core/nanops.py nanskew/nankurt call the function # _zero_out_fperr(m2) to fix floating error. - # if the variance is less than 1e-14, it could be - # treat as zero, here we follow the original - # skew/kurt behaviour to check m2 <= n * 1e-14 - elif m2 <= dnobs * 1e-14: + # if the variance is less than a relative cutoff value + # it could be treated as zero, here we follow the original + # skew/kurt behaviour to check + # m2 <= ((float64_machine_eps * mean) ** 2) * observations + elif m2 <= m2_cutoff: result = NaN else: moments_ratio = m3 / (m2 * sqrt(m2)) @@ -688,7 +693,7 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs, int64_t num_consecutive_same_value, ) noexcept nogil: cdef: - float64_t result, dnobs + float64_t result, dnobs, variance_cutoff float64_t A, B, C, D, R, K if nobs >= minp: @@ -708,16 +713,21 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs, R = R * A D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A + # Relative cutoff as introduced in #62405 + # See the comment in nanops.nankurt for further explanation + variance_cutoff = ((EpsF64 * B) ** 2) * dnobs + # #18044: with uniform distribution, floating issue will # cause B != 0. and cause the result is a very # large number. # # in core/nanops.py nanskew/nankurt call the function # _zero_out_fperr(m2) to fix floating error. - # if the variance is less than 1e-14, it could be - # treat as zero, here we follow the original - # skew/kurt behaviour to check B <= 1e-14 - if B <= 1e-14: + # if the variance is less than a relative cutoff value + # it could be treated as zero, here we follow the original + # skew/kurt behaviour to check + # m2 <= ((float64_machine_eps * mean) ** 2) * observations + if B <= variance_cutoff: result = NaN else: K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2) diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index fe873b3b74254..be6ed65696404 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -108,6 +108,14 @@ def series(): series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) return series +@pytest.fixture +def low_variance_series(): + """Make a mocked low variance series as a fixture""" + arr = np.random.default_rng(505).normal(loc=0e0, scale=1e-8, size=100) + locs = np.arange(20, 40) + arr[locs] = np.nan + series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) + return series @pytest.fixture def frame(): diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 79c14f243e7cc..4db42b2b08589 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -25,6 +25,16 @@ def test_series(series, sp_func, roll_func): tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:])) +@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) +def test_low_variance_series(low_variance_series, sp_func, roll_func): + sp_stats = pytest.importorskip("scipy.stats") + + compare_func = partial(getattr(sp_stats, sp_func), bias=False) + result = getattr(low_variance_series.rolling(50), roll_func)() + assert isinstance(result, Series) + tm.assert_almost_equal(result.iloc[-1], compare_func(low_variance_series[-50:])) + + @pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) def test_frame(raw, frame, sp_func, roll_func): sp_stats = pytest.importorskip("scipy.stats") From 4f3f6dcd4ffb92ece50f187a5774fd808eeff5db Mon Sep 17 00:00:00 2001 From: SoulofAkuma Date: Sat, 15 Nov 2025 14:21:04 +0100 Subject: [PATCH 2/5] Apply pre-commit hook formatting --- pandas/tests/window/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index be6ed65696404..8c3ea8df14a2e 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -108,6 +108,7 @@ def series(): series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) return series + @pytest.fixture def low_variance_series(): """Make a mocked low variance series as a fixture""" @@ -117,6 +118,7 @@ def low_variance_series(): series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) return series + @pytest.fixture def frame(): """Make mocked frame as fixture.""" From 833962324889f12d75d7ff3169d4fd4517f7b485 Mon Sep 17 00:00:00 2001 From: SoulofAkuma Date: Sat, 15 Nov 2025 15:47:45 +0100 Subject: [PATCH 3/5] Implemented review suggestions --- pandas/_libs/window/aggregations.pyx | 18 +----------------- pandas/tests/window/conftest.py | 10 ---------- pandas/tests/window/test_rolling_skew_kurt.py | 18 ++++++++++++++---- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index f8e43dfa78c81..26e326b40e908 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -494,34 +494,18 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs, int64_t num_consecutive_same_value ) noexcept nogil: cdef: - float64_t result, dnobs, m2_cutoff + float64_t result, dnobs float64_t moments_ratio, correction if nobs >= minp: dnobs = nobs - # Relative cutoff as introduced in #62405 - # See the comment in nanops.nankurt for further explanation - m2_cutoff = ((EpsF64 * mean) ** 2) * dnobs - if nobs < 3: result = NaN # GH 42064 46431 # uniform case, force result to be 0 elif num_consecutive_same_value >= nobs: result = 0.0 - # #18044: with degenerate distribution, floating issue will - # cause m2 != 0. and cause the result is a very - # large number. - # - # in core/nanops.py nanskew/nankurt call the function - # _zero_out_fperr(m2) to fix floating error. - # if the variance is less than a relative cutoff value - # it could be treated as zero, here we follow the original - # skew/kurt behaviour to check - # m2 <= ((float64_machine_eps * mean) ** 2) * observations - elif m2 <= m2_cutoff: - result = NaN else: moments_ratio = m3 / (m2 * sqrt(m2)) correction = dnobs * sqrt((dnobs - 1)) / (dnobs - 2) diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index 8c3ea8df14a2e..fe873b3b74254 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -109,16 +109,6 @@ def series(): return series -@pytest.fixture -def low_variance_series(): - """Make a mocked low variance series as a fixture""" - arr = np.random.default_rng(505).normal(loc=0e0, scale=1e-8, size=100) - locs = np.arange(20, 40) - arr[locs] = np.nan - series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) - return series - - @pytest.fixture def frame(): """Make mocked frame as fixture.""" diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 4db42b2b08589..3c54c7d5c47f1 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -1,3 +1,4 @@ +from datetime import datetime from functools import partial import numpy as np @@ -6,6 +7,7 @@ from pandas import ( DataFrame, Series, + bdate_range, concat, isna, notna, @@ -26,13 +28,21 @@ def test_series(series, sp_func, roll_func): @pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) -def test_low_variance_series(low_variance_series, sp_func, roll_func): +def test_low_variance_series(sp_func, roll_func): sp_stats = pytest.importorskip("scipy.stats") + arr = np.random.default_rng(505).normal(loc=0e0, scale=1e-16, size=100) + locs = np.arange(20, 40) + arr[locs] = np.nan + low_variance_series = Series( + arr, index=bdate_range(datetime(2009, 1, 1), periods=100) + ) + compare_func = partial(getattr(sp_stats, sp_func), bias=False) - result = getattr(low_variance_series.rolling(50), roll_func)() - assert isinstance(result, Series) - tm.assert_almost_equal(result.iloc[-1], compare_func(low_variance_series[-50:])) + window = low_variance_series.rolling(50) + result = getattr(window, roll_func)() + expected = window.apply(compare_func) + tm.assert_almost_equal(result, expected) @pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]]) From 48b0e16d9200c95c9537f97617a61d5a777e634b Mon Sep 17 00:00:00 2001 From: Mika Allert <58139975+SoulofAkuma@users.noreply.github.com> Date: Sat, 15 Nov 2025 17:17:07 +0100 Subject: [PATCH 4/5] Replace power operator with manual multiplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Álvaro Kothe --- pandas/_libs/window/aggregations.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 26e326b40e908..35200bd104735 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -699,7 +699,7 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs, # Relative cutoff as introduced in #62405 # See the comment in nanops.nankurt for further explanation - variance_cutoff = ((EpsF64 * B) ** 2) * dnobs + variance_cutoff = EpsF64 * EpsF64 * A * A * dnobs # #18044: with uniform distribution, floating issue will # cause B != 0. and cause the result is a very From 6430126e18019970f5fb5989cc9a3bda1f4bd9b5 Mon Sep 17 00:00:00 2001 From: SoulofAkuma Date: Sat, 15 Nov 2025 18:56:58 +0100 Subject: [PATCH 5/5] Added whatsnew entry --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c96bb7f663368..f72ddd476fbf4 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1260,6 +1260,7 @@ Groupby/resample/rolling - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`) - Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`) - Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`) +- Bug in :meth:`Rolling.skew` and :meth:`Rolling.kurt` produced ``NaN`` for low-variance inputs due to an absolute numerical instability check. This check was removed/turned into a relative check depending on the numerical stability of the calculation (:issue:`62946`) Reshaping ^^^^^^^^^