Skip to content

Commit cfeca09

Browse files
committed
Fix rolling skew/kurt for low variance windows
1 parent fa5b90a commit cfeca09

File tree

3 files changed

+38
-10
lines changed

3 files changed

+38
-10
lines changed

pandas/_libs/window/aggregations.pyx

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -494,12 +494,16 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
494494
int64_t num_consecutive_same_value
495495
) noexcept nogil:
496496
cdef:
497-
float64_t result, dnobs
497+
float64_t result, dnobs, m2_cutoff
498498
float64_t moments_ratio, correction
499499

500500
if nobs >= minp:
501501
dnobs = <float64_t>nobs
502502

503+
# Relative cutoff as introduced in #62405
504+
# See the comment in nanops.nankurt for further explanation
505+
m2_cutoff = ((EpsF64 * mean) ** 2) * dnobs
506+
503507
if nobs < 3:
504508
result = NaN
505509
# GH 42064 46431
@@ -512,10 +516,11 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
512516
#
513517
# in core/nanops.py nanskew/nankurt call the function
514518
# _zero_out_fperr(m2) to fix floating error.
515-
# if the variance is less than 1e-14, it could be
516-
# treat as zero, here we follow the original
517-
# skew/kurt behaviour to check m2 <= n * 1e-14
518-
elif m2 <= dnobs * 1e-14:
519+
# if the variance is less than a relative cutoff value
520+
# it could be treated as zero, here we follow the original
521+
# skew/kurt behaviour to check
522+
# m2 <= ((float64_machine_eps * mean) ** 2) * observations
523+
elif m2 <= m2_cutoff:
519524
result = NaN
520525
else:
521526
moments_ratio = m3 / (m2 * sqrt(m2))
@@ -688,7 +693,7 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
688693
int64_t num_consecutive_same_value,
689694
) noexcept nogil:
690695
cdef:
691-
float64_t result, dnobs
696+
float64_t result, dnobs, variance_cutoff
692697
float64_t A, B, C, D, R, K
693698

694699
if nobs >= minp:
@@ -708,16 +713,21 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
708713
R = R * A
709714
D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
710715

716+
# Relative cutoff as introduced in #62405
717+
# See the comment in nanops.nankurt for further explanation
718+
variance_cutoff = ((EpsF64 * B) ** 2) * dnobs
719+
711720
# #18044: with uniform distribution, floating issue will
712721
# cause B != 0. and cause the result is a very
713722
# large number.
714723
#
715724
# in core/nanops.py nanskew/nankurt call the function
716725
# _zero_out_fperr(m2) to fix floating error.
717-
# if the variance is less than 1e-14, it could be
718-
# treat as zero, here we follow the original
719-
# skew/kurt behaviour to check B <= 1e-14
720-
if B <= 1e-14:
726+
# if the variance is less than a relative cutoff value
727+
# it could be treated as zero, here we follow the original
728+
# skew/kurt behaviour to check
729+
# m2 <= ((float64_machine_eps * mean) ** 2) * observations
730+
if B <= variance_cutoff:
721731
result = NaN
722732
else:
723733
K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)

pandas/tests/window/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ def series():
108108
series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100))
109109
return series
110110

111+
@pytest.fixture
112+
def low_variance_series():
113+
"""Make a mocked low variance series as a fixture"""
114+
arr = np.random.default_rng(505).normal(loc=0e0, scale=1e-8, size=100)
115+
locs = np.arange(20, 40)
116+
arr[locs] = np.nan
117+
series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100))
118+
return series
111119

112120
@pytest.fixture
113121
def frame():

pandas/tests/window/test_rolling_skew_kurt.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ def test_series(series, sp_func, roll_func):
2525
tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))
2626

2727

28+
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
29+
def test_low_variance_series(low_variance_series, sp_func, roll_func):
30+
sp_stats = pytest.importorskip("scipy.stats")
31+
32+
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
33+
result = getattr(low_variance_series.rolling(50), roll_func)()
34+
assert isinstance(result, Series)
35+
tm.assert_almost_equal(result.iloc[-1], compare_func(low_variance_series[-50:]))
36+
37+
2838
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
2939
def test_frame(raw, frame, sp_func, roll_func):
3040
sp_stats = pytest.importorskip("scipy.stats")

0 commit comments

Comments
 (0)