@@ -124,96 +124,12 @@ def group_last_object(ndarray[object, ndim=2] out,
124124 out[i, j] = resx[i, j]
125125
126126
127- @ cython.boundscheck (False )
128- @ cython.wraparound (False )
129127def group_rank_object (ndarray[float64_t , ndim = 2 ] out,
130128 ndarray[object , ndim = 2 ] values,
131129 ndarray[int64_t] labels ,
132130 bint is_datetimelike , object ties_method ,
133131 bint ascending , bint pct , object na_option ):
134- """
135- Only transforms on axis=0
136- """
137- cdef:
138- TiebreakEnumType tiebreak
139- Py_ssize_t i, j, N, K
140- int64_t val_start= 0 , grp_start= 0 , dups= 0 , sum_ranks= 0 , grp_vals_seen= 1
141- int64_t grp_na_count= 0
142- ndarray[int64_t] _as
143- ndarray[object ] _values
144- bint keep_na
145-
146- tiebreak = tiebreakers[ties_method]
147- keep_na = na_option == ' keep'
148- N, K = (< object > values).shape
149-
150- masked_vals = np.array(values[:, 0 ], copy = True )
151- mask = missing.isnaobj(masked_vals)
152-
153- if ascending ^ (na_option == ' top' ):
154- nan_fill_val = np.inf
155- order = (masked_vals, mask, labels)
156- else :
157- nan_fill_val = - np.inf
158- order = (masked_vals, ~ mask, labels)
159- np.putmask(masked_vals, mask, nan_fill_val)
160- try :
161- _as = np.lexsort(order)
162- except TypeError :
163- # lexsort fails when missing data and objects are mixed
164- # fallback to argsort
165- _arr = np.asarray(list (zip (order[0 ], order[1 ], order[2 ])),
166- dtype = [(' values' , ' O' ), (' mask' , ' ?' ),
167- (' labels' , ' i8' )])
168- _as = np.argsort(_arr, kind = ' mergesort' , order = (' labels' ,
169- ' mask' , ' values' ))
170-
171- if not ascending:
172- _as = _as[::- 1 ]
173-
174- for i in range (N):
175- dups += 1
176- sum_ranks += i - grp_start + 1
177-
178- if keep_na and (values[_as[i], 0 ] != values[_as[i], 0 ]):
179- grp_na_count += 1
180- out[_as[i], 0 ] = np.nan
181- else :
182- if tiebreak == TIEBREAK_AVERAGE:
183- for j in range (i - dups + 1 , i + 1 ):
184- out[_as[j], 0 ] = sum_ranks / dups
185- elif tiebreak == TIEBREAK_MIN:
186- for j in range (i - dups + 1 , i + 1 ):
187- out[_as[j], 0 ] = i - grp_start - dups + 2
188- elif tiebreak == TIEBREAK_MAX:
189- for j in range (i - dups + 1 , i + 1 ):
190- out[_as[j], 0 ] = i - grp_start + 1
191- elif tiebreak == TIEBREAK_FIRST:
192- for j in range (i - dups + 1 , i + 1 ):
193- if ascending:
194- out[_as[j], 0 ] = j + 1 - grp_start
195- else :
196- out[_as[j], 0 ] = 2 * i - j - dups + 2 - grp_start
197- elif tiebreak == TIEBREAK_DENSE:
198- for j in range (i - dups + 1 , i + 1 ):
199- out[_as[j], 0 ] = grp_vals_seen
200-
201- if i == N - 1 or (
202- (values[_as[i], 0 ] != values[_as[i+ 1 ], 0 ]) and not
203- (values[_as[i], 0 ] is np.nan and values[_as[i+ 1 ], 0 ] is np.nan)
204- ):
205- dups = sum_ranks = 0
206- val_start = i
207- grp_vals_seen += 1
208-
209- if i == N - 1 or labels[_as[i]] != labels[_as[i+ 1 ]]:
210- if pct:
211- for j in range (grp_start, i + 1 ):
212- out[_as[j], 0 ] = out[_as[j], 0 ] / (i - grp_start + 1
213- - grp_na_count)
214- grp_na_count = 0
215- grp_start = i + 1
216- grp_vals_seen = 1
132+ raise ValueError (" rank not supported for object dtypes" )
217133
218134
219135cdef inline float64_t median_linear(float64_t* a, int n) nogil:
0 commit comments