@@ -207,7 +207,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
207207 if fastpath :
208208 # fast path
209209 self ._codes = _coerce_indexer_dtype (values , categories )
210- self .categories = categories
210+ self ._categories = self . _validate_categories ( categories , fastpath = isinstance ( categories , ABCIndexClass ))
211211 self ._ordered = ordered
212212 return
213213
@@ -274,6 +274,8 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
274274 ### FIXME ####
275275 raise NotImplementedError ("> 1 ndim Categorical are not supported at this time" )
276276
277+ categories = self ._validate_categories (categories )
278+
277279 else :
278280 # there were two ways if categories are present
279281 # - the old one, where each value is a int pointer to the levels array -> not anymore
@@ -282,7 +284,6 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
282284
283285 # make sure that we always have the same type here, no matter what we get passed in
284286 categories = self ._validate_categories (categories )
285-
286287 codes = _get_codes_for_values (values , categories )
287288
288289 # TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
@@ -295,7 +296,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
295296 "'Categorical.from_codes(codes, categories)'?" , RuntimeWarning , stacklevel = 2 )
296297
297298 self .set_ordered (ordered or False , inplace = True )
298- self .categories = categories
299+ self ._categories = categories
299300 self ._codes = _coerce_indexer_dtype (codes , categories )
300301
301302 def copy (self ):
@@ -421,9 +422,15 @@ def _get_labels(self):
421422 _categories = None
422423
423424 @classmethod
424- def _validate_categories (cls , categories ):
425+ def _validate_categories (cls , categories , fastpath = False ):
425426 """
426427 Validates that we have good categories
428+
429+ Parameters
430+ ----------
431+ fastpath : boolean (default: False)
432+ Don't perform validation of the categories for uniqueness or nulls
433+
427434 """
428435 if not isinstance (categories , ABCIndexClass ):
429436 dtype = None
@@ -439,16 +446,40 @@ def _validate_categories(cls, categories):
439446
440447 from pandas import Index
441448 categories = Index (categories , dtype = dtype )
442- if not categories .is_unique :
443- raise ValueError ('Categorical categories must be unique' )
449+
450+ if not fastpath :
451+
452+ # check properties of the categories
453+ # we don't allow NaNs in the categories themselves
454+
455+ if categories .hasnans :
456+ # NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
457+ msg = ('\n Setting NaNs in `categories` is deprecated and '
458+ 'will be removed in a future version of pandas.' )
459+ warn (msg , FutureWarning , stacklevel = 5 )
460+
461+ # categories must be unique
462+
463+ if not categories .is_unique :
464+ raise ValueError ('Categorical categories must be unique' )
465+
444466 return categories
445467
446- def _set_categories (self , categories ):
447- """ Sets new categories """
448- categories = self ._validate_categories (categories )
449- if not self ._categories is None and len (categories ) != len (self ._categories ):
468+ def _set_categories (self , categories , fastpath = False ):
469+ """ Sets new categories
470+
471+ Parameters
472+ ----------
473+ fastpath : boolean (default: False)
474+ Don't perform validation of the categories for uniqueness or nulls
475+
476+ """
477+
478+ categories = self ._validate_categories (categories , fastpath = fastpath )
479+ if not fastpath and not self ._categories is None and len (categories ) != len (self ._categories ):
450480 raise ValueError ("new categories need to have the same number of items than the old "
451481 "categories!" )
482+
452483 self ._categories = categories
453484
454485 def _get_categories (self ):
@@ -581,11 +612,10 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
581612 if not cat ._categories is None and len (new_categories ) < len (cat ._categories ):
582613 # remove all _codes which are larger and set to -1/NaN
583614 self ._codes [self ._codes >= len (new_categories )] = - 1
584- cat ._categories = new_categories
585615 else :
586616 values = cat .__array__ ()
587617 cat ._codes = _get_codes_for_values (values , new_categories )
588- cat ._categories = new_categories
618+ cat ._categories = new_categories
589619
590620 if ordered is None :
591621 ordered = self .ordered
@@ -706,9 +736,8 @@ def add_categories(self, new_categories, inplace=False):
706736 msg = "new categories must not include old categories: %s" % str (already_included )
707737 raise ValueError (msg )
708738 new_categories = list (self ._categories ) + list (new_categories )
709- new_categories = self ._validate_categories (new_categories )
710739 cat = self if inplace else self .copy ()
711- cat ._categories = new_categories
740+ cat ._categories = self . _validate_categories ( new_categories )
712741 cat ._codes = _coerce_indexer_dtype (cat ._codes , new_categories )
713742 if not inplace :
714743 return cat
@@ -1171,7 +1200,7 @@ def order(self, inplace=False, ascending=True, na_position='last'):
11711200 Category.sort
11721201 """
11731202 warn ("order is deprecated, use sort_values(...)" ,
1174- FutureWarning , stacklevel = 2 )
1203+ FutureWarning , stacklevel = 3 )
11751204 return self .sort_values (inplace = inplace , ascending = ascending , na_position = na_position )
11761205
11771206 def sort (self , inplace = True , ascending = True , na_position = 'last' ):
0 commit comments