@@ -409,8 +409,9 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
409409
410410
411411cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
412- float64_t x, float64_t xx,
413- float64_t xxx) nogil:
412+ float64_t x, float64_t xx, float64_t xxx,
413+ int64_t num_consecutive_same_value
414+ ) nogil:
414415 cdef:
415416 float64_t result, dnobs
416417 float64_t A, B, C, R
@@ -421,6 +422,12 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
421422 B = xx / dnobs - A * A
422423 C = xxx / dnobs - A * A * A - 3 * A * B
423424
425+ if nobs < 3 :
426+ result = NaN
427+ # GH 42064 46431
428+ # uniform case, force result to be 0
429+ elif num_consecutive_same_value >= nobs:
430+ result = 0.0
424431 # #18044: with uniform distribution, floating issue will
425432 # cause B != 0. and cause the result is a very
426433 # large number.
@@ -430,7 +437,7 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
430437 # if the variance is less than 1e-14, it could be
431438 # treat as zero, here we follow the original
432439 # skew/kurt behaviour to check B <= 1e-14
433- if B <= 1e-14 or nobs < 3 :
440+ elif B <= 1e-14 :
434441 result = NaN
435442 else :
436443 R = sqrt(B)
@@ -447,7 +454,10 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
447454 float64_t * xxx,
448455 float64_t * compensation_x,
449456 float64_t * compensation_xx,
450- float64_t * compensation_xxx) nogil:
457+ float64_t * compensation_xxx,
458+ int64_t * num_consecutive_same_value,
459+ float64_t * prev_value,
460+ ) nogil:
451461 """ add a value from the skew calc """
452462 cdef:
453463 float64_t y, t
@@ -469,6 +479,14 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
469479 compensation_xxx[0 ] = t - xxx[0 ] - y
470480 xxx[0 ] = t
471481
482+ # GH#42064, record num of same values to remove floating point artifacts
483+ if val == prev_value[0 ]:
484+ num_consecutive_same_value[0 ] += 1
485+ else :
486+ # reset to 1 (include current value itself)
487+ num_consecutive_same_value[0 ] = 1
488+ prev_value[0 ] = val
489+
472490
473491cdef inline void remove_skew(float64_t val, int64_t * nobs,
474492 float64_t * x, float64_t * xx,
@@ -507,8 +525,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
507525 float64_t compensation_xx_add , compensation_xx_remove
508526 float64_t compensation_x_add , compensation_x_remove
509527 float64_t x , xx , xxx
528+ float64_t prev_value
510529 int64_t nobs = 0 , N = len (start), V = len (values), nobs_mean = 0
511- int64_t s , e
530+ int64_t s , e , num_consecutive_same_value
512531 ndarray[float64_t] output , mean_array , values_copy
513532 bint is_monotonic_increasing_bounds
514533
@@ -542,6 +561,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
542561 # never removed
543562 if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
544563
564+ prev_value = values[s]
565+ num_consecutive_same_value = 0
566+
545567 compensation_xxx_add = compensation_xxx_remove = 0
546568 compensation_xx_add = compensation_xx_remove = 0
547569 compensation_x_add = compensation_x_remove = 0
@@ -550,7 +572,8 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
550572 for j in range (s, e):
551573 val = values_copy[j]
552574 add_skew(val, & nobs, & x, & xx, & xxx, & compensation_x_add,
553- & compensation_xx_add, & compensation_xxx_add)
575+ & compensation_xx_add, & compensation_xxx_add,
576+ & num_consecutive_same_value, & prev_value)
554577
555578 else :
556579
@@ -566,9 +589,10 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
566589 for j in range (end[i - 1 ], e):
567590 val = values_copy[j]
568591 add_skew(val, & nobs, & x, & xx, & xxx, & compensation_x_add,
569- & compensation_xx_add, & compensation_xxx_add)
592+ & compensation_xx_add, & compensation_xxx_add,
593+ & num_consecutive_same_value, & prev_value)
570594
571- output[i] = calc_skew(minp, nobs, x, xx, xxx)
595+ output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value )
572596
573597 if not is_monotonic_increasing_bounds:
574598 nobs = 0
@@ -584,35 +608,44 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
584608
585609cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs,
586610 float64_t x, float64_t xx,
587- float64_t xxx, float64_t xxxx) nogil:
611+ float64_t xxx, float64_t xxxx,
612+ int64_t num_consecutive_same_value,
613+ ) nogil:
588614 cdef:
589615 float64_t result, dnobs
590616 float64_t A, B, C, D, R, K
591617
592618 if nobs >= minp:
593- dnobs = < float64_t> nobs
594- A = x / dnobs
595- R = A * A
596- B = xx / dnobs - R
597- R = R * A
598- C = xxx / dnobs - R - 3 * A * B
599- R = R * A
600- D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
601-
602- # #18044: with uniform distribution, floating issue will
603- # cause B != 0. and cause the result is a very
604- # large number.
605- #
606- # in core/nanops.py nanskew/nankurt call the function
607- # _zero_out_fperr(m2) to fix floating error.
608- # if the variance is less than 1e-14, it could be
609- # treat as zero, here we follow the original
610- # skew/kurt behaviour to check B <= 1e-14
611- if B <= 1e-14 or nobs < 4 :
619+ if nobs < 4 :
612620 result = NaN
621+ # GH 42064 46431
622+ # uniform case, force result to be -3.
623+ elif num_consecutive_same_value >= nobs:
624+ result = - 3.
613625 else :
614- K = (dnobs * dnobs - 1. ) * D / (B * B) - 3 * ((dnobs - 1. ) ** 2 )
615- result = K / ((dnobs - 2. ) * (dnobs - 3. ))
626+ dnobs = < float64_t> nobs
627+ A = x / dnobs
628+ R = A * A
629+ B = xx / dnobs - R
630+ R = R * A
631+ C = xxx / dnobs - R - 3 * A * B
632+ R = R * A
633+ D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
634+
635+ # #18044: with uniform distribution, floating issue will
636+ # cause B != 0. and cause the result is a very
637+ # large number.
638+ #
639+ # in core/nanops.py nanskew/nankurt call the function
640+ # _zero_out_fperr(m2) to fix floating error.
641+ # if the variance is less than 1e-14, it could be
642+ # treat as zero, here we follow the original
643+ # skew/kurt behaviour to check B <= 1e-14
644+ if B <= 1e-14 :
645+ result = NaN
646+ else :
647+ K = (dnobs * dnobs - 1. ) * D / (B * B) - 3 * ((dnobs - 1. ) ** 2 )
648+ result = K / ((dnobs - 2. ) * (dnobs - 3. ))
616649 else :
617650 result = NaN
618651
@@ -625,7 +658,10 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
625658 float64_t * compensation_x,
626659 float64_t * compensation_xx,
627660 float64_t * compensation_xxx,
628- float64_t * compensation_xxxx) nogil:
661+ float64_t * compensation_xxxx,
662+ int64_t * num_consecutive_same_value,
663+ float64_t * prev_value
664+ ) nogil:
629665 """ add a value from the kurotic calc """
630666 cdef:
631667 float64_t y, t
@@ -651,6 +687,14 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
651687 compensation_xxxx[0 ] = t - xxxx[0 ] - y
652688 xxxx[0 ] = t
653689
690+ # GH#42064, record num of same values to remove floating point artifacts
691+ if val == prev_value[0 ]:
692+ num_consecutive_same_value[0 ] += 1
693+ else :
694+ # reset to 1 (include current value itself)
695+ num_consecutive_same_value[0 ] = 1
696+ prev_value[0 ] = val
697+
654698
655699cdef inline void remove_kurt(float64_t val, int64_t * nobs,
656700 float64_t * x, float64_t * xx,
@@ -695,7 +739,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
695739 float64_t compensation_xx_remove , compensation_xx_add
696740 float64_t compensation_x_remove , compensation_x_add
697741 float64_t x , xx , xxx , xxxx
698- int64_t nobs , s , e , N = len (start), V = len (values), nobs_mean = 0
742+ float64_t prev_value
743+ int64_t nobs , s , e , num_consecutive_same_value
744+ int64_t N = len (start), V = len (values), nobs_mean = 0
699745 ndarray[float64_t] output , values_copy
700746 bint is_monotonic_increasing_bounds
701747
@@ -729,6 +775,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
729775 # never removed
730776 if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
731777
778+ prev_value = values[s]
779+ num_consecutive_same_value = 0
780+
732781 compensation_xxxx_add = compensation_xxxx_remove = 0
733782 compensation_xxx_remove = compensation_xxx_add = 0
734783 compensation_xx_remove = compensation_xx_add = 0
@@ -738,7 +787,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
738787 for j in range (s, e):
739788 add_kurt(values_copy[j], & nobs, & x, & xx, & xxx, & xxxx,
740789 & compensation_x_add, & compensation_xx_add,
741- & compensation_xxx_add, & compensation_xxxx_add)
790+ & compensation_xxx_add, & compensation_xxxx_add,
791+ & num_consecutive_same_value, & prev_value)
742792
743793 else :
744794
@@ -754,9 +804,10 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
754804 for j in range (end[i - 1 ], e):
755805 add_kurt(values_copy[j], & nobs, & x, & xx, & xxx, & xxxx,
756806 & compensation_x_add, & compensation_xx_add,
757- & compensation_xxx_add, & compensation_xxxx_add)
807+ & compensation_xxx_add, & compensation_xxxx_add,
808+ & num_consecutive_same_value, & prev_value)
758809
759- output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
810+ output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value )
760811
761812 if not is_monotonic_increasing_bounds:
762813 nobs = 0
0 commit comments