@@ -673,10 +673,45 @@ def group_mean(floating[:, ::1] out,
673673 int64_t[::1] counts ,
674674 ndarray[floating , ndim = 2 ] values,
675675 const intp_t[::1] labels ,
676- Py_ssize_t min_count = - 1 ) -> None:
676+ Py_ssize_t min_count = - 1 ,
677+ bint is_datetimelike = False ,
678+ const uint8_t[:, ::1] mask = None ,
679+ uint8_t[:, ::1] result_mask = None
680+ ) -> None:
681+ """
682+ Compute the mean per label given a label assignment for each value.
683+ NaN values are ignored.
684+
685+ Parameters
686+ ----------
687+ out : np.ndarray[floating]
688+ Values into which this method will write its results.
689+ counts : np.ndarray[int64]
690+ A zeroed array of the same shape as labels ,
691+ populated by group sizes during algorithm.
692+ values : np.ndarray[floating]
693+ 2-d array of the values to find the mean of.
694+ labels : np.ndarray[np.intp]
695+ Array containing unique label for each group , with its
696+ ordering matching up to the corresponding record in `values`.
697+ min_count : Py_ssize_t
698+ Only used in add and prod. Always -1.
699+ is_datetimelike : bool
700+ True if `values` contains datetime-like entries.
701+ mask : ndarray[bool , ndim = 2 ], optional
702+ Not used.
703+ result_mask : ndarray[bool , ndim = 2 ], optional
704+ Not used.
705+
706+ Notes
707+ -----
708+ This method modifies the `out` parameter rather than returning an object.
709+ `counts` is modified to hold group sizes
710+ """
711+
677712 cdef:
678713 Py_ssize_t i , j , N , K , lab , ncounts = len (counts)
679- floating val , count , y , t
714+ floating val , count , y , t , nan_val
680715 floating[:, ::1] sumx , compensation
681716 int64_t[:, ::1] nobs
682717 Py_ssize_t len_values = len (values), len_labels = len (labels)
@@ -686,12 +721,13 @@ def group_mean(floating[:, ::1] out,
686721 if len_values != len_labels:
687722 raise ValueError("len(index ) != len(labels )")
688723
689- nobs = np.zeros((< object > out).shape, dtype = np.int64)
690724 # the below is equivalent to `np.zeros_like(out )` but faster
725+ nobs = np.zeros((< object > out).shape, dtype = np.int64)
691726 sumx = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
692727 compensation = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
693728
694729 N , K = (< object > values).shape
730+ nan_val = NPY_NAT if is_datetimelike else NAN
695731
696732 with nogil:
697733 for i in range(N ):
@@ -703,7 +739,7 @@ def group_mean(floating[:, ::1] out,
703739 for j in range (K):
704740 val = values[i, j]
705741 # not nan
706- if val == val:
742+ if val == val and not (is_datetimelike and val == NPY_NAT) :
707743 nobs[lab, j] += 1
708744 y = val - compensation[lab, j]
709745 t = sumx[lab, j] + y
@@ -714,7 +750,7 @@ def group_mean(floating[:, ::1] out,
714750 for j in range (K):
715751 count = nobs[i, j]
716752 if nobs[i, j] == 0 :
717- out[i, j] = NAN
753+ out[i, j] = nan_val
718754 else :
719755 out[i, j] = sumx[i, j] / count
720756
0 commit comments