2828from pandas .core .dtypes .missing import notna
2929
3030import pandas .core .algorithms as algos
31+ from pandas .core .algorithms import unique
3132from pandas .core .arrays .categorical import factorize_from_iterable
3233from pandas .core .construction import ensure_wrapped_if_datetimelike
3334from pandas .core .frame import DataFrame
@@ -545,7 +546,7 @@ def _unstack_extension_series(series: Series, level, fill_value) -> DataFrame:
545546 return result
546547
547548
548- def stack (frame : DataFrame , level = - 1 , dropna : bool = True ):
549+ def stack (frame : DataFrame , level = - 1 , dropna : bool = True , sort : bool = True ):
549550 """
550551 Convert DataFrame to Series with multi-level Index. Columns become the
551552 second level of the resulting hierarchical index
@@ -567,7 +568,9 @@ def factorize(index):
567568 level_num = frame .columns ._get_level_number (level )
568569
569570 if isinstance (frame .columns , MultiIndex ):
570- return _stack_multi_columns (frame , level_num = level_num , dropna = dropna )
571+ return _stack_multi_columns (
572+ frame , level_num = level_num , dropna = dropna , sort = sort
573+ )
571574 elif isinstance (frame .index , MultiIndex ):
572575 new_levels = list (frame .index .levels )
573576 new_codes = [lab .repeat (K ) for lab in frame .index .codes ]
@@ -620,13 +623,13 @@ def factorize(index):
620623 return frame ._constructor_sliced (new_values , index = new_index )
621624
622625
623- def stack_multiple (frame : DataFrame , level , dropna : bool = True ):
626+ def stack_multiple (frame : DataFrame , level , dropna : bool = True , sort : bool = True ):
624627 # If all passed levels match up to column names, no
625628 # ambiguity about what to do
626629 if all (lev in frame .columns .names for lev in level ):
627630 result = frame
628631 for lev in level :
629- result = stack (result , lev , dropna = dropna )
632+ result = stack (result , lev , dropna = dropna , sort = sort )
630633
631634 # Otherwise, level numbers may change as each successive level is stacked
632635 elif all (isinstance (lev , int ) for lev in level ):
@@ -639,7 +642,7 @@ def stack_multiple(frame: DataFrame, level, dropna: bool = True):
639642
640643 while level :
641644 lev = level .pop (0 )
642- result = stack (result , lev , dropna = dropna )
645+ result = stack (result , lev , dropna = dropna , sort = sort )
643646 # Decrement all level numbers greater than current, as these
644647 # have now shifted down by one
645648 level = [v if v <= lev else v - 1 for v in level ]
@@ -681,7 +684,7 @@ def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex:
681684
682685
683686def _stack_multi_columns (
684- frame : DataFrame , level_num : int = - 1 , dropna : bool = True
687+ frame : DataFrame , level_num : int = - 1 , dropna : bool = True , sort : bool = True
685688) -> DataFrame :
686689 def _convert_level_number (level_num : int , columns : Index ):
687690 """
@@ -711,7 +714,7 @@ def _convert_level_number(level_num: int, columns: Index):
711714 roll_columns = roll_columns .swaplevel (lev1 , lev2 )
712715 this .columns = mi_cols = roll_columns
713716
714- if not mi_cols ._is_lexsorted ():
717+ if not mi_cols ._is_lexsorted () and sort :
715718 # Workaround the edge case where 0 is one of the column names,
716719 # which interferes with trying to sort based on the first
717720 # level
@@ -725,7 +728,9 @@ def _convert_level_number(level_num: int, columns: Index):
725728 # time to ravel the values
726729 new_data = {}
727730 level_vals = mi_cols .levels [- 1 ]
728- level_codes = sorted (set (mi_cols .codes [- 1 ]))
731+ level_codes = unique (mi_cols .codes [- 1 ])
732+ if sort :
733+ level_codes = np .sort (level_codes )
729734 level_vals_nan = level_vals .insert (len (level_vals ), None )
730735
731736 level_vals_used = np .take (level_vals_nan , level_codes )
0 commit comments