1717
1818str_type = str
1919
20+ # GH26403: sentinel value used for the default value of ordered in the
21+ # CategoricalDtype constructor to detect when ordered=None is explicitly passed
22+ ordered_sentinel = object () # type: object
23+
24+ # TODO(GH26403): Replace with Optional[bool] or bool
25+ OrderedType = Union [None , bool , object ]
26+
2027
2128def register_extension_dtype (cls : Type [ExtensionDtype ],
2229 ) -> Type [ExtensionDtype ]:
@@ -214,7 +221,9 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
214221 _metadata = ('categories' , 'ordered' )
215222 _cache = {} # type: Dict[str_type, PandasExtensionDtype]
216223
217- def __init__ (self , categories = None , ordered : Optional [bool ] = None ):
224+ def __init__ (self ,
225+ categories = None ,
226+ ordered : OrderedType = ordered_sentinel ):
218227 self ._finalize (categories , ordered , fastpath = False )
219228
220229 @classmethod
@@ -230,7 +239,7 @@ def _from_fastpath(cls,
230239 def _from_categorical_dtype (cls ,
231240 dtype : 'CategoricalDtype' ,
232241 categories = None ,
233- ordered : Optional [ bool ] = None ,
242+ ordered : OrderedType = None ,
234243 ) -> 'CategoricalDtype' :
235244 if categories is ordered is None :
236245 return dtype
@@ -330,19 +339,20 @@ def _from_values_or_dtype(cls,
330339
331340 def _finalize (self ,
332341 categories ,
333- ordered : Optional [ bool ] ,
342+ ordered : OrderedType ,
334343 fastpath : bool = False ,
335344 ) -> None :
336345
337- if ordered is not None :
346+ if ordered is not None and ordered is not ordered_sentinel :
338347 self .validate_ordered (ordered )
339348
340349 if categories is not None :
341350 categories = self .validate_categories (categories ,
342351 fastpath = fastpath )
343352
344353 self ._categories = categories
345- self ._ordered = ordered
354+ self ._ordered = ordered if ordered is not ordered_sentinel else None
355+ self ._ordered_from_sentinel = ordered is ordered_sentinel
346356
347357 def __setstate__ (self , state : Dict [str_type , Any ]) -> None :
348358 # for pickle compat. __get_state__ is defined in the
@@ -355,12 +365,12 @@ def __hash__(self) -> int:
355365 # _hash_categories returns a uint64, so use the negative
356366 # space for when we have unknown categories to avoid a conflict
357367 if self .categories is None :
358- if self .ordered :
368+ if self ._ordered :
359369 return - 1
360370 else :
361371 return - 2
362372 # We *do* want to include the real self.ordered here
363- return int (self ._hash_categories (self .categories , self .ordered ))
373+ return int (self ._hash_categories (self .categories , self ._ordered ))
364374
365375 def __eq__ (self , other : Any ) -> bool :
366376 """
@@ -379,7 +389,7 @@ def __eq__(self, other: Any) -> bool:
379389 return other == self .name
380390 elif other is self :
381391 return True
382- elif not (hasattr (other , 'ordered ' ) and hasattr (other , 'categories' )):
392+ elif not (hasattr (other , '_ordered ' ) and hasattr (other , 'categories' )):
383393 return False
384394 elif self .categories is None or other .categories is None :
385395 # We're forced into a suboptimal corner thanks to math and
@@ -388,10 +398,10 @@ def __eq__(self, other: Any) -> bool:
388398 # CDT(., .) = CDT(None, False) and *all*
389399 # CDT(., .) = CDT(None, True).
390400 return True
391- elif self .ordered or other .ordered :
401+ elif self ._ordered or other ._ordered :
392402 # At least one has ordered=True; equal if both have ordered=True
393403 # and the same values for categories in the same order.
394- return ((self .ordered == other .ordered ) and
404+ return ((self ._ordered == other ._ordered ) and
395405 self .categories .equals (other .categories ))
396406 else :
397407 # Neither has ordered=True; equal if both have the same categories,
@@ -406,10 +416,10 @@ def __repr__(self):
406416 data = "None, "
407417 else :
408418 data = self .categories ._format_data (name = self .__class__ .__name__ )
409- return tpl .format (data , self .ordered )
419+ return tpl .format (data , self ._ordered )
410420
411421 @staticmethod
412- def _hash_categories (categories , ordered : Optional [ bool ] = True ) -> int :
422+ def _hash_categories (categories , ordered : OrderedType = True ) -> int :
413423 from pandas .core .util .hashing import (
414424 hash_array , _combine_hash_arrays , hash_tuples
415425 )
@@ -459,7 +469,7 @@ def construct_array_type(cls):
459469 return Categorical
460470
461471 @staticmethod
462- def validate_ordered (ordered : bool ) -> None :
472+ def validate_ordered (ordered : OrderedType ) -> None :
463473 """
464474 Validates that we have a valid ordered parameter. If
465475 it is not a boolean, a TypeError will be raised.
@@ -534,17 +544,25 @@ def update_dtype(self, dtype: 'CategoricalDtype') -> 'CategoricalDtype':
534544 msg = ('a CategoricalDtype must be passed to perform an update, '
535545 'got {dtype!r}' ).format (dtype = dtype )
536546 raise ValueError (msg )
537- elif dtype .categories is not None and dtype .ordered is self .ordered :
538- return dtype
539547
540548 # dtype is CDT: keep current categories/ordered if None
541549 new_categories = dtype .categories
542550 if new_categories is None :
543551 new_categories = self .categories
544552
545- new_ordered = dtype .ordered
553+ new_ordered = dtype ._ordered
554+ new_ordered_from_sentinel = dtype ._ordered_from_sentinel
546555 if new_ordered is None :
547- new_ordered = self .ordered
556+ # maintain existing ordered if new dtype has ordered=None
557+ new_ordered = self ._ordered
558+ if self ._ordered and new_ordered_from_sentinel :
559+ # only warn if we'd actually change the existing behavior
560+ msg = ("Constructing a CategoricalDtype without specifying "
561+ "`ordered` will default to `ordered=False` in a future "
562+ "version, which will cause the resulting categorical's "
563+ "`ordered` attribute to change to False; `ordered=True`"
564+ " must be explicitly passed in order to be retained" )
565+ warnings .warn (msg , FutureWarning , stacklevel = 3 )
548566
549567 return CategoricalDtype (new_categories , new_ordered )
550568
@@ -556,10 +574,18 @@ def categories(self):
556574 return self ._categories
557575
558576 @property
559- def ordered (self ) -> Optional [ bool ] :
577+ def ordered (self ) -> OrderedType :
560578 """
561579 Whether the categories have an ordered relationship.
562580 """
581+ # TODO: remove if block when ordered=None as default is deprecated
582+ if self ._ordered_from_sentinel and self ._ordered is None :
583+ # warn when accessing ordered if ordered=None and None was not
584+ # explicitly passed to the constructor
585+ msg = ("Constructing a CategoricalDtype without specifying "
586+ "`ordered` will default to `ordered=False` in a future "
587+ "version; `ordered=None` must be explicitly passed." )
588+ warnings .warn (msg , FutureWarning , stacklevel = 2 )
563589 return self ._ordered
564590
565591 @property
0 commit comments