@@ -474,6 +474,13 @@ def nanargmin(values, axis=None, skipna=True):
474474
475475@disallow ('M8' , 'm8' )
476476def nanskew (values , axis = None , skipna = True ):
477+ """ Compute the sample skewness.
478+
479+ The statistic computed here is the adjusted Fisher-Pearson standardized
480+ moment coefficient G1. The algorithm computes this coefficient directly
481+ from the second and third central moment.
482+
483+ """
477484
478485 mask = isnull (values )
479486 if not is_float_dtype (values .dtype ):
@@ -486,32 +493,48 @@ def nanskew(values, axis=None, skipna=True):
486493 values = values .copy ()
487494 np .putmask (values , mask , 0 )
488495
489- typ = values .dtype .type
490- A = values .sum (axis ) / count
491- B = (values ** 2 ).sum (axis ) / count - A ** typ (2 )
492- C = (values ** 3 ).sum (axis ) / count - A ** typ (3 ) - typ (3 ) * A * B
496+ mean = values .sum (axis , dtype = np .float64 ) / count
497+ if axis is not None :
498+ mean = np .expand_dims (mean , axis )
499+
500+ adjusted = values - mean
501+ if skipna :
502+ np .putmask (adjusted , mask , 0 )
503+ adjusted2 = adjusted ** 2
504+ adjusted3 = adjusted2 * adjusted
505+ m2 = adjusted2 .sum (axis , dtype = np .float64 )
506+ m3 = adjusted3 .sum (axis , dtype = np .float64 )
493507
494508 # floating point error
495- B = _zero_out_fperr (B )
496- C = _zero_out_fperr (C )
509+ m2 = _zero_out_fperr (m2 )
510+ m3 = _zero_out_fperr (m3 )
497511
498- result = ((np .sqrt (count * count - count ) * C ) /
499- ((count - typ (2 )) * np .sqrt (B )** typ (3 )))
512+ result = (count * (count - 1 ) ** 0.5 / (count - 2 )) * (m3 / m2 ** 1.5 )
513+
514+ dtype = values .dtype
515+ if is_float_dtype (dtype ):
516+ result = result .astype (dtype )
500517
501518 if isinstance (result , np .ndarray ):
502- result = np .where (B == 0 , 0 , result )
519+ result = np .where (m2 == 0 , 0 , result )
503520 result [count < 3 ] = np .nan
504521 return result
505522 else :
506- result = 0 if B == 0 else result
523+ result = 0 if m2 == 0 else result
507524 if count < 3 :
508525 return np .nan
509526 return result
510527
511528
512529@disallow ('M8' , 'm8' )
513530def nankurt (values , axis = None , skipna = True ):
531+ """ Compute the sample skewness.
532+
533+ The statistic computed here is the adjusted Fisher-Pearson standardized
534+ moment coefficient G2, computed directly from the second and fourth
535+ central moment.
514536
537+ """
515538 mask = isnull (values )
516539 if not is_float_dtype (values .dtype ):
517540 values = values .astype ('f8' )
@@ -523,30 +546,43 @@ def nankurt(values, axis=None, skipna=True):
523546 values = values .copy ()
524547 np .putmask (values , mask , 0 )
525548
526- typ = values .dtype .type
527- A = values .sum (axis ) / count
528- B = (values ** 2 ).sum (axis ) / count - A ** typ (2 )
529- C = (values ** 3 ).sum (axis ) / count - A ** typ (3 ) - typ (3 ) * A * B
530- D = ((values ** 4 ).sum (axis ) / count - A ** typ (4 ) -
531- typ (6 ) * B * A * A - typ (4 ) * C * A )
549+ mean = values .sum (axis , dtype = np .float64 ) / count
550+ if axis is not None :
551+ mean = np .expand_dims (mean , axis )
552+
553+ adjusted = values - mean
554+ if skipna :
555+ np .putmask (adjusted , mask , 0 )
556+ adjusted2 = adjusted ** 2
557+ adjusted4 = adjusted2 ** 2
558+ m2 = adjusted2 .sum (axis , dtype = np .float64 )
559+ m4 = adjusted4 .sum (axis , dtype = np .float64 )
560+
561+ adj = 3 * (count - 1 ) ** 2 / ((count - 2 ) * (count - 3 ))
562+ numer = count * (count + 1 ) * (count - 1 ) * m4
563+ denom = (count - 2 ) * (count - 3 ) * m2 ** 2
564+ result = numer / denom - adj
532565
533- B = _zero_out_fperr (B )
534- D = _zero_out_fperr (D )
566+ # floating point error
567+ numer = _zero_out_fperr (numer )
568+ denom = _zero_out_fperr (denom )
535569
536- if not isinstance (B , np .ndarray ):
537- # if B is a scalar, check these corner cases first before doing
538- # division
570+ if not isinstance (denom , np .ndarray ):
571+ # if ``denom`` is a scalar, check these corner cases first before
572+ # doing division
539573 if count < 4 :
540574 return np .nan
541- if B == 0 :
575+ if denom == 0 :
542576 return 0
543577
544- result = (((count * count - typ (1 )) * D / (B * B ) - typ (3 ) *
545- ((count - typ (1 ))** typ (2 ))) / ((count - typ (2 )) *
546- (count - typ (3 ))))
578+ result = numer / denom - adj
579+
580+ dtype = values .dtype
581+ if is_float_dtype (dtype ):
582+ result = result .astype (dtype )
547583
548584 if isinstance (result , np .ndarray ):
549- result = np .where (B == 0 , 0 , result )
585+ result = np .where (denom == 0 , 0 , result )
550586 result [count < 4 ] = np .nan
551587
552588 return result
0 commit comments