@@ -267,22 +267,103 @@ def match(to_match, values, na_sentinel=-1):
267267 return result
268268
269269
270- def unique1d (values ):
270+ def unique (values ):
271271 """
272- Hash table-based unique
272+ Hash table-based unique. Uniques are returned in order
273+ of appearance. This does NOT sort.
274+
275+ Significantly faster than numpy.unique. Includes NA values.
276+
277+ Parameters
278+ ----------
279+ values : 1d array-like
280+
281+ Returns
282+ -------
283+ unique values.
284+ - If the input is an Index, the return is an Index
285+ - If the input is a Categorical dtype, the return is a Categorical
286+ - If the input is a Series/ndarray, the return will be an ndarray
287+
288+ Examples
289+ --------
290+ pd.unique(pd.Series([2, 1, 3, 3]))
291+ array([2, 1, 3])
292+
293+ >>> pd.unique(pd.Series([2] + [1] * 5))
294+ array([2, 1])
295+
296+ >>> pd.unique(Series([pd.Timestamp('20160101'),
297+ ... pd.Timestamp('20160101')]))
298+ array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
299+
300+ >>> pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
301+ ... pd.Timestamp('20160101', tz='US/Eastern')]))
302+ array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')],
303+ dtype=object)
304+
305+ >>> pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
306+ ... pd.Timestamp('20160101', tz='US/Eastern')]))
307+ DatetimeIndex(['2016-01-01 00:00:00-05:00'],
308+ ... dtype='datetime64[ns, US/Eastern]', freq=None)
309+
310+ >>> pd.unique(list('baabc'))
311+ array(['b', 'a', 'c'], dtype=object)
312+
313+ An unordered Categorical will return categories in the
314+ order of appearance.
315+
316+ >>> pd.unique(Series(pd.Categorical(list('baabc'))))
317+ [b, a, c]
318+ Categories (3, object): [b, a, c]
319+
320+ >>> pd.unique(Series(pd.Categorical(list('baabc'),
321+ ... categories=list('abc'))))
322+ [b, a, c]
323+ Categories (3, object): [b, a, c]
324+
325+ An ordered Categorical preserves the category ordering.
326+
327+ >>> pd.unique(Series(pd.Categorical(list('baabc'),
328+ ... categories=list('abc'),
329+ ... ordered=True)))
330+ [b, a, c]
331+ Categories (3, object): [a < b < c]
332+
333+ See Also
334+ --------
335+ pandas.Index.unique
336+ pandas.Series.unique
337+
273338 """
339+
274340 values = _ensure_arraylike (values )
341+
342+ # categorical is a fast-path
343+ # this will coerce Categorical, CategoricalIndex,
344+ # and category dtypes Series to same return of Category
345+ if is_categorical_dtype (values ):
346+ values = getattr (values , '.values' , values )
347+ return values .unique ()
348+
275349 original = values
276350 htable , _ , values , dtype , ndtype = _get_hashtable_algo (values )
277351
278352 table = htable (len (values ))
279353 uniques = table .unique (values )
280354 uniques = _reconstruct_data (uniques , dtype , original )
281355
356+ if isinstance (original , ABCSeries ) and is_datetime64tz_dtype (dtype ):
357+ # we are special casing datetime64tz_dtype
358+ # to return an object array of tz-aware Timestamps
359+
360+ # TODO: it must return DatetimeArray with tz in pandas 2.0
361+ uniques = uniques .asobject .values
362+
282363 return uniques
283364
284365
285- unique = unique1d
366+ unique1d = unique
286367
287368
288369def isin (comps , values ):
@@ -651,7 +732,7 @@ def mode(values):
651732 if is_categorical_dtype (values ):
652733
653734 if isinstance (values , Series ):
654- return Series (values .values .mode ())
735+ return Series (values .values .mode (), name = values . name )
655736 return values .mode ()
656737
657738 values , dtype , ndtype = _ensure_data (values )
0 commit comments