@@ -73,32 +73,32 @@ def _maybe_to_categorical(array):
7373Level codes are an array if integer which are the positions of the real
7474values in the categories array.
7575
76- There is not setter, used the other categorical methods and the item setter on
77- Categorical to change values in the categorical.
76+ There is not setter, use the other categorical methods and the normal item setter to change
77+ values in the categorical.
7878"""
7979
8080_categories_doc = """The categories of this categorical.
8181
8282Setting assigns new values to each category (effectively a rename of
8383each individual category).
8484
85- The assigned value has to be a list-like object. If the number of
86- category-items is less than number of category-items in the current category,
87- all category-items at a higher position are set to NaN. If the number of
88- category-items is more that the current number of category-items, new
89- (unused) categories are added at the end.
90-
91- To add category-items in between, use `reorder_categories`.
85+ The assigned value has to be a list-like object. All items must be unique and the number of items
86+ in the new categories must be the same as the number of items in the old categories.
9287
9388Raises
9489------
9590ValueError
96- If the new categories do not validate as categories
91+ If the new categories do not validate as categories or if the number of new categories is
92+ unequal the number of old categories
9793
9894See also
9995--------
100- Categorical.reorder_categories
101- Categorical.remove_unused_categories
96+ rename_categories
97+ reorder_categories
98+ add_categories
99+ remove_categories
100+ remove_unused_categories
101+ set_categories
102102"""
103103class Categorical (PandasObject ):
104104
@@ -399,10 +399,9 @@ def _validate_categories(cls, categories):
399399 def _set_categories (self , categories ):
400400 """ Sets new categories """
401401 categories = self ._validate_categories (categories )
402-
403- if not self ._categories is None and len (categories ) < len (self ._categories ):
404- # remove all _codes which are larger
405- self ._codes [self ._codes >= len (categories )] = - 1
402+ if not self ._categories is None and len (categories ) != len (self ._categories ):
403+ raise ValueError ("new categories need to have the same number of items than the old "
404+ "categories!" )
406405 self ._categories = categories
407406
408407 def _get_categories (self ):
@@ -425,18 +424,118 @@ def _get_levels(self):
425424 # TODO: Remove after deprecation period in 2017/ after 0.18
426425 levels = property (fget = _get_levels , fset = _set_levels )
427426
427+ def set_categories (self , new_categories , ordered = None , rename = False , inplace = False ):
428+ """ Sets the categories to the specified new_categories.
429+
430+ `new_categories` can include new categories (which will result in unused categories) or
431+ or remove old categories (which results in values set to NaN). If `rename==True`,
432+ the categories will simple be renamed (less or more items than in old categories will
433+ result in values set to NaN or in unused categories respectively).
434+
435+ This method can be used to perform more than one action of adding, removing,
436+ and reordering simultaneously and is therefore faster than performing the individual steps
437+ via the more specialised methods.
438+
439+ On the other hand this methods does not do checks (e.g., whether the old categories are
440+ included in the new categories on a reorder), which can result in surprising changes, for
441+ example when using special string dtypes on python3, which does not considers a S1 string
442+ equal to a single char python string.
443+
444+ Raises
445+ ------
446+ ValueError
447+ If new_categories does not validate as categories
448+
449+ Parameters
450+ ----------
451+ new_categories : Index-like
452+ The categories in new order.
453+ ordered : boolean, optional
454+ Whether or not the categorical is treated as a ordered categorical. If not given,
455+ do not change the ordered information.
456+ rename : boolean (default: False)
457+ Whether or not the new_categories should be considered as a rename of the old
458+ categories or as reordered categories.
459+ inplace : boolean (default: False)
460+ Whether or not to reorder the categories inplace or return a copy of this categorical
461+ with reordered categories.
462+
463+ Returns
464+ -------
465+ cat : Categorical with reordered categories or None if inplace.
466+
467+ See also
468+ --------
469+ rename_categories
470+ reorder_categories
471+ add_categories
472+ remove_categories
473+ remove_unused_categories
474+ """
475+ new_categories = self ._validate_categories (new_categories )
476+ cat = self if inplace else self .copy ()
477+ if rename :
478+ if not cat ._categories is None and len (new_categories ) < len (cat ._categories ):
479+ # remove all _codes which are larger and set to -1/NaN
480+ self ._codes [self ._codes >= len (new_categories )] = - 1
481+ cat ._categories = new_categories
482+ else :
483+ values = cat .__array__ ()
484+ cat ._codes = _get_codes_for_values (values , new_categories )
485+ cat ._categories = new_categories
486+
487+ if not ordered is None :
488+ cat .ordered = ordered
489+
490+ if not inplace :
491+ return cat
492+
493+ def rename_categories (self , new_categories , inplace = False ):
494+ """ Renames categories.
495+
496+ The new categories has to be a list-like object. All items must be unique and the number of
497+ items in the new categories must be the same as the number of items in the old categories.
498+
499+ Raises
500+ ------
501+ ValueError
502+ If the new categories do not have the same number of items than the current categories
503+ or do not validate as categories
504+
505+ Parameters
506+ ----------
507+ new_categories : Index-like
508+ The renamed categories.
509+ inplace : boolean (default: False)
510+ Whether or not to rename the categories inplace or return a copy of this categorical
511+ with renamed categories.
512+
513+ Returns
514+ -------
515+ cat : Categorical with renamed categories added or None if inplace.
516+
517+ See also
518+ --------
519+ reorder_categories
520+ add_categories
521+ remove_categories
522+ remove_unused_categories
523+ set_categories
524+ """
525+ cat = self if inplace else self .copy ()
526+ cat .categories = new_categories
527+ if not inplace :
528+ return cat
428529
429530 def reorder_categories (self , new_categories , ordered = None , inplace = False ):
430531 """ Reorders categories as specified in new_categories.
431532
432- `new_categories` do not need to include all old categories and can also include new
433- category items. All old categories not in new categories are replaced by NaN. In
434- contrast to assigning to `categories`, new category items can be in arbitrary positions.
533+ `new_categories` need to include all old categories and no new category items.
435534
436535 Raises
437536 ------
438537 ValueError
439- If the new categories do not contain all old category items
538+ If the new categories do not contain all old category items or any new ones
440539
441540 Parameters
442541 ----------
@@ -445,38 +544,131 @@ def reorder_categories(self, new_categories, ordered=None, inplace=False):
445544 ordered : boolean, optional
446545 Whether or not the categorical is treated as a ordered categorical. If not given,
447546 do not change the ordered information.
448- inplace : bool (default: False)
547+ inplace : boolean (default: False)
449548 Whether or not to reorder the categories inplace or return a copy of this categorical
450549 with reordered categories.
451550
452551 Returns
453552 -------
454553 cat : Categorical with reordered categories or None if inplace.
554+
555+ See also
556+ --------
557+ rename_categories
558+ add_categories
559+ remove_categories
560+ remove_unused_categories
561+ set_categories
455562 """
456- new_categories = self ._validate_categories (new_categories )
563+ if set (self ._categories ) != set (new_categories ):
564+ raise ValueError ("items in new_categories are not the same as in old categories" )
565+ return self .set_categories (new_categories , ordered = ordered , inplace = inplace )
566+
567+ def add_categories (self , new_categories , inplace = False ):
568+ """ Add new categories.
457569
570+ `new_categories` will be included at the last/highest place in the categories and will be
571+ unused directly after this call.
572+
573+ Raises
574+ ------
575+ ValueError
576+ If the new categories include old categories or do not validate as categories
577+
578+ Parameters
579+ ----------
580+ new_categories : category or list-like of category
581+ The new categories to be included.
582+ inplace : boolean (default: False)
583+ Whether or not to add the categories inplace or return a copy of this categorical
584+ with added categories.
585+
586+ Returns
587+ -------
588+ cat : Categorical with new categories added or None if inplace.
589+
590+ See also
591+ --------
592+ rename_categories
593+ reorder_categories
594+ remove_categories
595+ remove_unused_categories
596+ set_categories
597+ """
598+ if not com .is_list_like (new_categories ):
599+ new_categories = [new_categories ]
600+ already_included = set (new_categories ) & set (self ._categories )
601+ if len (already_included ) != 0 :
602+ msg = "new categories must not include old categories: %s" % str (already_included )
603+ raise ValueError (msg )
604+ new_categories = list (self ._categories ) + (new_categories )
605+ new_categories = self ._validate_categories (new_categories )
458606 cat = self if inplace else self .copy ()
459- values = cat .__array__ ()
460- cat ._codes = _get_codes_for_values (values , new_categories )
461607 cat ._categories = new_categories
462- if not ordered is None :
463- cat .ordered = ordered
464608 if not inplace :
465609 return cat
466610
611+ def remove_categories (self , removals , inplace = False ):
612+ """ Removes the specified categories.
613+
614+ `removals` must be included in the old categories. Values which were in the removed
615+ categories will be set to NaN
616+
617+ Raises
618+ ------
619+ ValueError
620+ If the removals are not contained in the categories
621+
622+ Parameters
623+ ----------
624+ removals : category or list of categories
625+ The categories which should be removed.
626+ inplace : boolean (default: False)
627+ Whether or not to remove the categories inplace or return a copy of this categorical
628+ with removed categories.
629+
630+ Returns
631+ -------
632+ cat : Categorical with removed categories or None if inplace.
633+
634+ See also
635+ --------
636+ rename_categories
637+ reorder_categories
638+ add_categories
639+ remove_unused_categories
640+ set_categories
641+ """
642+ if not com .is_list_like (removals ):
643+ removals = [removals ]
644+ not_included = set (removals ) - set (self ._categories )
645+ if len (not_included ) != 0 :
646+ raise ValueError ("removals must all be in old categories: %s" % str (not_included ))
647+ new_categories = set (self ._categories ) - set (removals )
648+ return self .set_categories (new_categories , ordered = self .ordered , rename = False ,
649+ inplace = inplace )
650+
651+
467652 def remove_unused_categories (self , inplace = False ):
468653 """ Removes categories which are not used.
469654
470655 Parameters
471656 ----------
472- inplace : bool (default: False)
657+ inplace : boolean (default: False)
473658 Whether or not to drop unused categories inplace or return a copy of this categorical
474659 with unused categories dropped.
475660
476661 Returns
477662 -------
478663 cat : Categorical with unused categories dropped or None if inplace.
479664
665+ See also
666+ --------
667+ rename_categories
668+ reorder_categories
669+ add_categories
670+ remove_categories
671+ set_categories
480672 """
481673 cat = self if inplace else self .copy ()
482674 _used = sorted (np .unique (cat ._codes ))
@@ -1176,8 +1368,12 @@ def _delegate_method(self, name, *args, **kwargs):
11761368 accessors = ["categories" , "ordered" ],
11771369 typ = 'property' )
11781370CategoricalAccessor ._add_delegate_accessors (delegate = Categorical ,
1179- accessors = ["reorder_categories" ,
1180- "remove_unused_categories" ],
1371+ accessors = ["rename_categories" ,
1372+ "reorder_categories" ,
1373+ "add_categories" ,
1374+ "remove_categories" ,
1375+ "remove_unused_categories" ,
1376+ "set_categories" ],
11811377 typ = 'method' )
11821378
11831379##### utility routines #####
0 commit comments